123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371 |
- #!/usr/bin/perl -w
- use strict;
- use warnings;
- ## general comments:
- ## this script is indirectly called by run_casp.pl, see comments therein
- ## it calls several functions and mainly functions as a "gluing" script
- ## its behavious is mainly controled by the config file read at the beginning
- ## in general it does the following:
- ## 1) build MSA from sequence via hhblits
- ## 2) build hhr-template-list via hhsearch
- ## 3) (pre-)select several templates via template selection strategy
- ## 4) filter preselected templates and query
- ## 5) rank templates with single template NN
- ## 6) select final templates
- ## 7) generate final (artifical) hhr file with selected templates
- ## 8) optionally replace distance restraints
- ## 9) call MODELLER and build final model
- ## this final model in saved at outdir and will then be sent to
- ## the CASP organizers via the run_casp.pl script
- ## more details:
- ## all temporary files are written to workingDir
- ## temporary files are eg. *.a3m, *.hhm, *.tab, *.hhr
- ## amd are named workingDir/queryName.*
- ## final results must be copied back to "dirbasename", e.g. outbase.pdb to dirbasename.pdb
- ##
- ## if filtering is on the filtered files are put in another temporary directory,
- ## see filterAlignments.pm
- ## the final pdb-file must be in outbase.pdb so that run_casp.pl can find this file and
- ## attach it to the reply-e-mail
- ##
- use File::Basename;
- use File::Spec;
- BEGIN {
- my $dirname = dirname(File::Spec->rel2abs(__FILE__));
- unshift @INC, $dirname . "/lib";
- }
- use File::Temp;
- use config;
- use utilities;
- use Template;
- use TemplateList;
- use TemplateStruct;
- use TemplateListStruct;
- use selectTemplatesHeuristic;
- use filterAlignments;
- use predTMscore;
- use checkTemplates;
- use modeller;
- use QualityAssessModel;
- ## called via run_casp.pl with these parameters
- my $server = "hhpred";
- my $outFile = "";
- my $configFile = undef;
- my $queryEnding = "";
- my $queryName = "";
- my $queryFile = "";
- #############################
- ## parse command line options
- my $options = "";
- for (my $i=0; $i<@ARGV; $i++) {
- $options .= " $ARGV[$i] ";
- }
- if ($options =~ s/-i\s+(\S+)/ /) {
- $queryFile = $1;
- my $fpath;
- ($queryName, $fpath, $queryEnding) = fileparse("$queryFile", qr/\.[^.]*/);
- }
- if ($options =~ s/-o\s+(\S+)/ /) { $outFile = $1; }
- if ($options =~ s/-config\s+(\S+)/ /) { $configFile = $1; }
- ##############################
- print "\n";
- print "==========================================================\n";
- print "| HHPRED structure predictor |\n";
- print "==========================================================\n";
- print "\n";
- ## set default values
- #my $queryFile = "$dirbasename" . "$queryEnding";
- my $workingDir = "/tmp/$$";
- ## set configuration
- my $config = HHpredConfig->instance($configFile);
- my $preselectedTemplatesFile = "";
- my $allRankedTemplates = TemplateList->new();
- if ($queryFile eq "" or $outFile eq "" or $queryName eq "" or ! -e $queryFile) {
- print "usage: $0 -i <infile> -o <outfile> [-c <configfile>]!\n";
- print " <infile> .a3m or .seq file\n";
- print " <outfile> resulting pdb file\n\n";
- exit 1;
- }
- ## create working directory
- if (! -e "$workingDir") {
- &System("mkdir -p $workingDir");
- } else {
- my $dir = File::Temp->newdir("XXXXX", DIR => "$workingDir", CLEANUP => 0);
- $workingDir = $dir->dirname;
- &System("chmod 777 $workingDir");
- }
- if (! -e "$workingDir/$queryName") {
- &System("mkdir -p $workingDir/$queryName");
- $workingDir = "$workingDir/$queryName";
- }
- #############################
- $config->print();
- print "\n";
- #############################
- ## set up starting files
- ## all files are written to workingDir
- $queryName .= &getRandomString(7); # add random stuff so that queryName != template (needed for MODELLER)
- my $outbase = "$workingDir/$queryName";
- ## either a3m or seq file
- if ($queryEnding ne ".a3m") {
- &System("cp $queryFile $outbase.seq");
- } else {
- &System("cp $queryFile $outbase.a3m");
- }
- #############################
- ## build query alignment if necessary
- if (! -e "$outbase.a3m") {
- my $command = "HHLIB=" . $config->get_hhlib()
- . " "
- . $config->get_hhblits()
- . " -i $outbase.seq"
- . " -oalis $outbase"
- . " -ohhm $outbase.hhm"
- . " -n " . $config->get_hhblits_rounds()
- . " -mact " . $config->get_hhblits_mact()
- . " -oa3m $outbase.a3m"
- . " -d " . $config->get_uniprot20()
- . " -cpu " . $config->get_cpus();
- &System($command);
- sleep(1);
- &System($config->get_addss() . " $outbase.a3m");
- sleep(1);
- }
- &System($config->get_hhmake() . " -i $outbase.a3m -o $outbase.hhm");
- sleep(1);
- ##############################
- ## search against database via hhsearch
- my $pdbdir = $config->get_pdbdir();
- $pdbdir =~ s/
- \/$ # trim trailing slash
- //x;
- my $pdbdb = "$pdbdir/db/pdb.hhm";
- &System("HHLIB=" . $config->get_hhlib()
- . " "
- . $config->get_hhsearch()
- . " -i $outbase.hhm"
- . " -d $pdbdb"
- . " -mact " . $config->get_hhsearch_mact()
- . " -cpu " . $config->get_cpus()
- . " -atab $outbase.start.tab");
- while (!(-e "$outbase.hhr")) {
- sleep(1)
- }
- &System("cp $outbase.hhr $outbase.start.hhr");
- ##############################
- ## start handling of templates
- my $tList = TemplateListStruct->new();
- $tList->hhr_to_TemplateList("$outbase.hhr");
- my $queryLength = $tList->get_queryLength();
- $tList->print();
- #############################
- ## preselect template (best according to similarity, sumprobs, probability
- ## and fill up rest with heuristic
- if ($config->get_preselectTemplates()) {
- $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, 100, 1, $tList, $config);
- print "preselected templates:\n";
- $tList->print();
- print "====================================\n\n";
- }
- #############################
- ## filtering
- if ($config->get_doFiltering()) {
- $tList = &Filtering($queryName, $outbase, $tList, $server, $config);
- print "Filtered templates:\n";
- $tList->print();
- print "====================================\n\n";
- }
- #############################
- ## rank templates with NN
- if ($config->get_rankTemplates()) {
- my $rankingNN = TMscoreNN->new();
- $rankingNN->rank_templates($tList, $queryLength, $config);
-
- $allRankedTemplates = $tList;
- print "TM score ranked templates\n";
- $tList->print();
- print "====================================\n\n";
- }
- #############################
- ## final template selection
- if ($config->get_multiTemplate()) {
- my $maxNumTemplates = $config->get_maxNumOfTemplates();
- $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, $maxNumTemplates, 2, $tList, $config);
- } else {
- $tList = &SingleTemplateSelection($tList);
- }
- ## take same template(s) as in file (instead of the ones selected before);
- ## keep previous template(s) if preselected ones are not in template list
- if ($preselectedTemplatesFile ne "") {
- my $preselectedTemplates = TemplateList->new();
- $preselectedTemplates->read_from_file($preselectedTemplatesFile);
-
- my $newTemplates = TemplateList->new();
- for (my $i=0; $i<$preselectedTemplates->size(); $i++) {
- for (my $j=0; $j<$allRankedTemplates->size(); $j++) {
- my $preTemplate = $preselectedTemplates->get($i);
- my $rankTemplate = $allRankedTemplates->get($j);
- if ($preTemplate->get_Hit() eq $rankTemplate->get_Hit()) {
- ## check overlap
- my $overlap = &min($preTemplate->get_Qend(), $rankTemplate->get_Qend()) - &max($preTemplate->get_Qstart(), $rankTemplate->get_Qstart());
- my $preLen = $preTemplate->get_Qend() - $preTemplate->get_Qstart();
- if ($overlap / $preLen > 0.5) {
- $newTemplates->check_and_add($rankTemplate);
- last;
- }
- }
- }
- }
- ## templates found
- if ($newTemplates->size() > 0) {
- print "preselected templates could be found\n";
- $tList = $newTemplates;
- } else {
- ## keep old templates
- print "preselected templates could NOT be found - keep old templates\n";
- }
- }
- print "final templates:\n";
- $tList->print();
- print "====================================\n\n";
- $tList->write_to_file("$outbase.templates");
- #############################
- ## create "artificial" hhr file for input, needed by hhmakemodel to build pir alignment
- $tList->templateList_to_hhr($outbase);
- #############################
- ## generate pir alignment
- ## the hhr file is generated/overwritten by the function call before
- ## the a3m is generated above using hhblast
- ## CheckTemplates possibly creates "corrected" pdb files and saves them
- ## in the "working" directory; then calls hhmakemodel to build outbase.pir
- print "checking templates\n";
- &CheckTemplates($outbase, $workingDir, $config);
- print "\n====================================\n\n";
- # if ($config->get_realignProbcons() && $tList->size() > 1) {
- # my $pirFile = PirFile->new("$outbase.pir");
- # my $fastaFile = $pirFile->to_fasta();
- # $fastaFile->write_to_file("$outbase.fas");
- # my $probcons = $config->get_probcons();
- # &System("$probcons $outbase.fas > $outbase.fas");
- # sleep(1);
- # my $fastaRealigned = FastaFile->new("$outbase.fas");
- # my $pirRealigned = $fastaRealigned->to_pir();
- # $pirRealigned->write_to_file("$outbase.pir");
- # }
- #############################
- ## Modeller initialization
- print "Modeller initialization\n";
- &ModellerRSR(
- queryName => $queryName,
- workingDir => $workingDir,
- outbase => $outbase,
- config => $config
- );
- #############################
- ## replace distance restraints
- ## and call Modeller
- if ($config->get_replaceDistanceRestraints()) {
- my $normalizer = &ChangeDistanceRestraints($outbase, $workingDir, $tList, $config);
- &ModellerNewDistance(
- rsrFile => "$outbase.new.rsr",
- queryName => $queryName,
- workingDir => $workingDir,
- outbase => $outbase,
- config => $config,
- normalizer => $normalizer);
- } else {
- &ModellerRaw(
- rsrFile => "$outbase.rsr",
- queryName => $queryName,
- workingDir => $workingDir,
- outbase => $outbase,
- config => $config
- );
- }
- &WriteCASPpdbFile("$outbase.pdb", "$outbase.pdb", $queryName, $server, $tList);
- if ($config->get_assessModel()) {
- $tList->set_queryLength($queryLength);
- my $assessor = QualityAssessModel->new(tList=>$tList, outbase=>$outbase, pdbFile=>"$outbase.pdb");
- $assessor->run();
- }
- #############################
- ## clean up
- my @tabs = <$outbase*.tab>;
- for (my $i=0; $i<@tabs; $i++) {
- system("rm $tabs[$i]");
- }
- #############################
- ## copy files of interest back to outdir , e.g. to the tempdir
- &System("cp $outbase.pdb $outFile");
- #&System("rm -r $workingDir");
- ## create a directory containing intermediate files
- #my $resultDir = $dirbasename . "Results";
- #&System("mkdir -p $resultDir");
- #&System("chmod 777 $resultDir");
- #&System("cp $outbase.* $resultDir");
- print "END HHpred for $queryName\n";
- print "=========================\n";
|