#!/usr/bin/perl -w use strict; use warnings; ## general comments: ## this script is indirectly called by run_casp.pl, see comments therein ## it calls several functions and mainly functions as a "gluing" script ## its behavious is mainly controled by the config file read at the beginning ## in general it does the following: ## 1) build MSA from sequence via hhblits ## 2) build hhr-template-list via hhsearch ## 3) (pre-)select several templates via template selection strategy ## 4) filter preselected templates and query ## 5) rank templates with single template NN ## 6) select final templates ## 7) generate final (artifical) hhr file with selected templates ## 8) optionally replace distance restraints ## 9) call MODELLER and build final model ## this final model in saved at outdir and will then be sent to ## the CASP organizers via the run_casp.pl script ## more details: ## all temporary files are written to workingDir ## temporary files are eg. *.a3m, *.hhm, *.tab, *.hhr ## amd are named workingDir/queryName.* ## final results must be copied back to "dirbasename", e.g. outbase.pdb to dirbasename.pdb ## ## if filtering is on the filtered files are put in another temporary directory, ## see filterAlignments.pm ## the final pdb-file must be in outbase.pdb so that run_casp.pl can find this file and ## attach it to the reply-e-mail ## use File::Basename; use File::Spec; BEGIN { my $dirname = dirname(File::Spec->rel2abs(__FILE__)); unshift @INC, $dirname . "/lib"; } use File::Temp; use config; use utilities; use Template; use TemplateList; use TemplateStruct; use TemplateListStruct; use selectTemplatesHeuristic; use filterAlignments; use predTMscore; use checkTemplates; use modeller; use QualityAssessModel; ## called via run_casp.pl with these parameters my $server = "hhpred"; my $outFile = ""; my $configFile = undef; my $queryEnding = ""; my $queryName = ""; my $queryFile = ""; ############################# ## parse command line options my $options = ""; for (my $i=0; $i<@ARGV; $i++) { $options .= " $ARGV[$i] "; } if ($options =~ s/-i\s+(\S+)/ /) { $queryFile = $1; my $fpath; ($queryName, $fpath, $queryEnding) = fileparse("$queryFile", qr/\.[^.]*/); } if ($options =~ s/-o\s+(\S+)/ /) { $outFile = $1; } if ($options =~ s/-config\s+(\S+)/ /) { $configFile = $1; } ############################## print "\n"; print "==========================================================\n"; print "| HHPRED structure predictor |\n"; print "==========================================================\n"; print "\n"; ## set default values #my $queryFile = "$dirbasename" . "$queryEnding"; my $workingDir = "/tmp/$$"; ## set configuration my $config = HHpredConfig->instance($configFile); my $preselectedTemplatesFile = ""; my $allRankedTemplates = TemplateList->new(); if ($queryFile eq "" or $outFile eq "" or $queryName eq "" or ! -e $queryFile) { print "usage: $0 -i -o [-c ]!\n"; print " .a3m or .seq file\n"; print " resulting pdb file\n\n"; exit 1; } ## create working directory if (! -e "$workingDir") { &System("mkdir -p $workingDir"); } else { my $dir = File::Temp->newdir("XXXXX", DIR => "$workingDir", CLEANUP => 0); $workingDir = $dir->dirname; &System("chmod 777 $workingDir"); } if (! -e "$workingDir/$queryName") { &System("mkdir -p $workingDir/$queryName"); $workingDir = "$workingDir/$queryName"; } ############################# $config->print(); print "\n"; ############################# ## set up starting files ## all files are written to workingDir $queryName .= &getRandomString(7); # add random stuff so that queryName != template (needed for MODELLER) my $outbase = "$workingDir/$queryName"; ## either a3m or seq file if ($queryEnding ne ".a3m") { &System("cp $queryFile $outbase.seq"); } else { &System("cp $queryFile $outbase.a3m"); } ############################# ## build query alignment if necessary if (! -e "$outbase.a3m") { my $command = "HHLIB=" . $config->get_hhlib() . " " . $config->get_hhblits() . " -i $outbase.seq" . " -oalis $outbase" . " -ohhm $outbase.hhm" . " -n " . $config->get_hhblits_rounds() . " -mact " . $config->get_hhblits_mact() . " -oa3m $outbase.a3m" . " -d " . $config->get_uniprot20() . " -cpu " . $config->get_cpus(); &System($command); sleep(1); &System($config->get_addss() . " $outbase.a3m"); sleep(1); } &System($config->get_hhmake() . " -i $outbase.a3m -o $outbase.hhm"); sleep(1); ############################## ## search against database via hhsearch my $pdbdir = $config->get_pdbdir(); $pdbdir =~ s/ \/$ # trim trailing slash //x; my $pdbdb = "$pdbdir/db/pdb.hhm"; &System("HHLIB=" . $config->get_hhlib() . " " . $config->get_hhsearch() . " -i $outbase.hhm" . " -d $pdbdb" . " -mact " . $config->get_hhsearch_mact() . " -cpu " . $config->get_cpus() . " -atab $outbase.start.tab"); while (!(-e "$outbase.hhr")) { sleep(1) } &System("cp $outbase.hhr $outbase.start.hhr"); ############################## ## start handling of templates my $tList = TemplateListStruct->new(); $tList->hhr_to_TemplateList("$outbase.hhr"); my $queryLength = $tList->get_queryLength(); $tList->print(); ############################# ## preselect template (best according to similarity, sumprobs, probability ## and fill up rest with heuristic if ($config->get_preselectTemplates()) { $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, 100, 1, $tList, $config); print "preselected templates:\n"; $tList->print(); print "====================================\n\n"; } ############################# ## filtering if ($config->get_doFiltering()) { $tList = &Filtering($queryName, $outbase, $tList, $server, $config); print "Filtered templates:\n"; $tList->print(); print "====================================\n\n"; } ############################# ## rank templates with NN if ($config->get_rankTemplates()) { my $rankingNN = TMscoreNN->new(); $rankingNN->rank_templates($tList, $queryLength, $config); $allRankedTemplates = $tList; print "TM score ranked templates\n"; $tList->print(); print "====================================\n\n"; } ############################# ## final template selection if ($config->get_multiTemplate()) { my $maxNumTemplates = $config->get_maxNumOfTemplates(); $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, $maxNumTemplates, 2, $tList, $config); } else { $tList = &SingleTemplateSelection($tList); } ## take same template(s) as in file (instead of the ones selected before); ## keep previous template(s) if preselected ones are not in template list if ($preselectedTemplatesFile ne "") { my $preselectedTemplates = TemplateList->new(); $preselectedTemplates->read_from_file($preselectedTemplatesFile); my $newTemplates = TemplateList->new(); for (my $i=0; $i<$preselectedTemplates->size(); $i++) { for (my $j=0; $j<$allRankedTemplates->size(); $j++) { my $preTemplate = $preselectedTemplates->get($i); my $rankTemplate = $allRankedTemplates->get($j); if ($preTemplate->get_Hit() eq $rankTemplate->get_Hit()) { ## check overlap my $overlap = &min($preTemplate->get_Qend(), $rankTemplate->get_Qend()) - &max($preTemplate->get_Qstart(), $rankTemplate->get_Qstart()); my $preLen = $preTemplate->get_Qend() - $preTemplate->get_Qstart(); if ($overlap / $preLen > 0.5) { $newTemplates->check_and_add($rankTemplate); last; } } } } ## templates found if ($newTemplates->size() > 0) { print "preselected templates could be found\n"; $tList = $newTemplates; } else { ## keep old templates print "preselected templates could NOT be found - keep old templates\n"; } } print "final templates:\n"; $tList->print(); print "====================================\n\n"; $tList->write_to_file("$outbase.templates"); ############################# ## create "artificial" hhr file for input, needed by hhmakemodel to build pir alignment $tList->templateList_to_hhr($outbase); ############################# ## generate pir alignment ## the hhr file is generated/overwritten by the function call before ## the a3m is generated above using hhblast ## CheckTemplates possibly creates "corrected" pdb files and saves them ## in the "working" directory; then calls hhmakemodel to build outbase.pir print "checking templates\n"; &CheckTemplates($outbase, $workingDir, $config); print "\n====================================\n\n"; # if ($config->get_realignProbcons() && $tList->size() > 1) { # my $pirFile = PirFile->new("$outbase.pir"); # my $fastaFile = $pirFile->to_fasta(); # $fastaFile->write_to_file("$outbase.fas"); # my $probcons = $config->get_probcons(); # &System("$probcons $outbase.fas > $outbase.fas"); # sleep(1); # my $fastaRealigned = FastaFile->new("$outbase.fas"); # my $pirRealigned = $fastaRealigned->to_pir(); # $pirRealigned->write_to_file("$outbase.pir"); # } ############################# ## Modeller initialization print "Modeller initialization\n"; &ModellerRSR( queryName => $queryName, workingDir => $workingDir, outbase => $outbase, config => $config ); ############################# ## replace distance restraints ## and call Modeller if ($config->get_replaceDistanceRestraints()) { my $normalizer = &ChangeDistanceRestraints($outbase, $workingDir, $tList, $config); &ModellerNewDistance( rsrFile => "$outbase.new.rsr", queryName => $queryName, workingDir => $workingDir, outbase => $outbase, config => $config, normalizer => $normalizer); } else { &ModellerRaw( rsrFile => "$outbase.rsr", queryName => $queryName, workingDir => $workingDir, outbase => $outbase, config => $config ); } &WriteCASPpdbFile("$outbase.pdb", "$outbase.pdb", $queryName, $server, $tList); if ($config->get_assessModel()) { $tList->set_queryLength($queryLength); my $assessor = QualityAssessModel->new(tList=>$tList, outbase=>$outbase, pdbFile=>"$outbase.pdb"); $assessor->run(); } ############################# ## clean up my @tabs = <$outbase*.tab>; for (my $i=0; $i<@tabs; $i++) { system("rm $tabs[$i]"); } ############################# ## copy files of interest back to outdir , e.g. to the tempdir &System("cp $outbase.pdb $outFile"); #&System("rm -r $workingDir"); ## create a directory containing intermediate files #my $resultDir = $dirbasename . "Results"; #&System("mkdir -p $resultDir"); #&System("chmod 777 $resultDir"); #&System("cp $outbase.* $resultDir"); print "END HHpred for $queryName\n"; print "=========================\n";