hhpred.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. #!/usr/bin/perl -w
  2. use strict;
  3. use warnings;
  4. ## general comments:
  5. ## this script is indirectly called by run_casp.pl, see comments therein
  6. ## it calls several functions and mainly functions as a "gluing" script
  7. ## its behavious is mainly controled by the config file read at the beginning
  8. ## in general it does the following:
  9. ## 1) build MSA from sequence via hhblits
  10. ## 2) build hhr-template-list via hhsearch
  11. ## 3) (pre-)select several templates via template selection strategy
  12. ## 4) filter preselected templates and query
  13. ## 5) rank templates with single template NN
  14. ## 6) select final templates
  15. ## 7) generate final (artifical) hhr file with selected templates
  16. ## 8) optionally replace distance restraints
  17. ## 9) call MODELLER and build final model
  18. ## this final model in saved at outdir and will then be sent to
  19. ## the CASP organizers via the run_casp.pl script
  20. ## more details:
  21. ## all temporary files are written to workingDir
  22. ## temporary files are eg. *.a3m, *.hhm, *.tab, *.hhr
  23. ## amd are named workingDir/queryName.*
  24. ## final results must be copied back to "dirbasename", e.g. outbase.pdb to dirbasename.pdb
  25. ##
  26. ## if filtering is on the filtered files are put in another temporary directory,
  27. ## see filterAlignments.pm
  28. ## the final pdb-file must be in outbase.pdb so that run_casp.pl can find this file and
  29. ## attach it to the reply-e-mail
  30. ##
  31. use File::Basename;
  32. use File::Spec;
  33. BEGIN {
  34. my $dirname = dirname(File::Spec->rel2abs(__FILE__));
  35. unshift @INC, $dirname . "/lib";
  36. }
  37. use File::Temp;
  38. use config;
  39. use utilities;
  40. use Template;
  41. use TemplateList;
  42. use TemplateStruct;
  43. use TemplateListStruct;
  44. use selectTemplatesHeuristic;
  45. use filterAlignments;
  46. use predTMscore;
  47. use checkTemplates;
  48. use modeller;
  49. use QualityAssessModel;
  50. ## called via run_casp.pl with these parameters
  51. my $server = "hhpred";
  52. my $outFile = "";
  53. my $configFile = undef;
  54. my $queryEnding = "";
  55. my $queryName = "";
  56. my $queryFile = "";
  57. #############################
  58. ## parse command line options
  59. my $options = "";
  60. for (my $i=0; $i<@ARGV; $i++) {
  61. $options .= " $ARGV[$i] ";
  62. }
  63. if ($options =~ s/-i\s+(\S+)/ /) {
  64. $queryFile = $1;
  65. my $fpath;
  66. ($queryName, $fpath, $queryEnding) = fileparse("$queryFile", qr/\.[^.]*/);
  67. }
  68. if ($options =~ s/-o\s+(\S+)/ /) { $outFile = $1; }
  69. if ($options =~ s/-config\s+(\S+)/ /) { $configFile = $1; }
  70. ##############################
  71. print "\n";
  72. print "==========================================================\n";
  73. print "| HHPRED structure predictor |\n";
  74. print "==========================================================\n";
  75. print "\n";
  76. ## set default values
  77. #my $queryFile = "$dirbasename" . "$queryEnding";
  78. my $workingDir = "/tmp/$$";
  79. ## set configuration
  80. my $config = HHpredConfig->instance($configFile);
  81. my $preselectedTemplatesFile = "";
  82. my $allRankedTemplates = TemplateList->new();
  83. if ($queryFile eq "" or $outFile eq "" or $queryName eq "" or ! -e $queryFile) {
  84. print "usage: $0 -i <infile> -o <outfile> [-c <configfile>]!\n";
  85. print " <infile> .a3m or .seq file\n";
  86. print " <outfile> resulting pdb file\n\n";
  87. exit 1;
  88. }
  89. ## create working directory
  90. if (! -e "$workingDir") {
  91. &System("mkdir -p $workingDir");
  92. } else {
  93. my $dir = File::Temp->newdir("XXXXX", DIR => "$workingDir", CLEANUP => 0);
  94. $workingDir = $dir->dirname;
  95. &System("chmod 777 $workingDir");
  96. }
  97. if (! -e "$workingDir/$queryName") {
  98. &System("mkdir -p $workingDir/$queryName");
  99. $workingDir = "$workingDir/$queryName";
  100. }
  101. #############################
  102. $config->print();
  103. print "\n";
  104. #############################
  105. ## set up starting files
  106. ## all files are written to workingDir
  107. $queryName .= &getRandomString(7); # add random stuff so that queryName != template (needed for MODELLER)
  108. my $outbase = "$workingDir/$queryName";
  109. ## either a3m or seq file
  110. if ($queryEnding ne ".a3m") {
  111. &System("cp $queryFile $outbase.seq");
  112. } else {
  113. &System("cp $queryFile $outbase.a3m");
  114. }
  115. #############################
  116. ## build query alignment if necessary
  117. if (! -e "$outbase.a3m") {
  118. my $command = "HHLIB=" . $config->get_hhlib()
  119. . " "
  120. . $config->get_hhblits()
  121. . " -i $outbase.seq"
  122. . " -oalis $outbase"
  123. . " -ohhm $outbase.hhm"
  124. . " -n " . $config->get_hhblits_rounds()
  125. . " -mact " . $config->get_hhblits_mact()
  126. . " -oa3m $outbase.a3m"
  127. . " -d " . $config->get_uniprot20()
  128. . " -cpu " . $config->get_cpus();
  129. &System($command);
  130. sleep(1);
  131. &System($config->get_addss() . " $outbase.a3m");
  132. sleep(1);
  133. }
  134. &System($config->get_hhmake() . " -i $outbase.a3m -o $outbase.hhm");
  135. sleep(1);
  136. ##############################
  137. ## search against database via hhsearch
  138. my $pdbdir = $config->get_pdbdir();
  139. $pdbdir =~ s/
  140. \/$ # trim trailing slash
  141. //x;
  142. my $pdbdb = "$pdbdir/db/pdb.hhm";
  143. &System("HHLIB=" . $config->get_hhlib()
  144. . " "
  145. . $config->get_hhsearch()
  146. . " -i $outbase.hhm"
  147. . " -d $pdbdb"
  148. . " -mact " . $config->get_hhsearch_mact()
  149. . " -cpu " . $config->get_cpus()
  150. . " -atab $outbase.start.tab");
  151. while (!(-e "$outbase.hhr")) {
  152. sleep(1)
  153. }
  154. &System("cp $outbase.hhr $outbase.start.hhr");
  155. ##############################
  156. ## start handling of templates
  157. my $tList = TemplateListStruct->new();
  158. $tList->hhr_to_TemplateList("$outbase.hhr");
  159. my $queryLength = $tList->get_queryLength();
  160. $tList->print();
  161. #############################
  162. ## preselect template (best according to similarity, sumprobs, probability
  163. ## and fill up rest with heuristic
  164. if ($config->get_preselectTemplates()) {
  165. $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, 100, 1, $tList, $config);
  166. print "preselected templates:\n";
  167. $tList->print();
  168. print "====================================\n\n";
  169. }
  170. #############################
  171. ## filtering
  172. if ($config->get_doFiltering()) {
  173. $tList = &Filtering($queryName, $outbase, $tList, $server, $config);
  174. print "Filtered templates:\n";
  175. $tList->print();
  176. print "====================================\n\n";
  177. }
  178. #############################
  179. ## rank templates with NN
  180. if ($config->get_rankTemplates()) {
  181. my $rankingNN = TMscoreNN->new();
  182. $rankingNN->rank_templates($tList, $queryLength, $config);
  183. $allRankedTemplates = $tList;
  184. print "TM score ranked templates\n";
  185. $tList->print();
  186. print "====================================\n\n";
  187. }
  188. #############################
  189. ## final template selection
  190. if ($config->get_multiTemplate()) {
  191. my $maxNumTemplates = $config->get_maxNumOfTemplates();
  192. $tList = &ChooseTemplatesScoringHeuristic($queryName, $workingDir, $queryLength, $outbase, $maxNumTemplates, 2, $tList, $config);
  193. } else {
  194. $tList = &SingleTemplateSelection($tList);
  195. }
  196. ## take same template(s) as in file (instead of the ones selected before);
  197. ## keep previous template(s) if preselected ones are not in template list
  198. if ($preselectedTemplatesFile ne "") {
  199. my $preselectedTemplates = TemplateList->new();
  200. $preselectedTemplates->read_from_file($preselectedTemplatesFile);
  201. my $newTemplates = TemplateList->new();
  202. for (my $i=0; $i<$preselectedTemplates->size(); $i++) {
  203. for (my $j=0; $j<$allRankedTemplates->size(); $j++) {
  204. my $preTemplate = $preselectedTemplates->get($i);
  205. my $rankTemplate = $allRankedTemplates->get($j);
  206. if ($preTemplate->get_Hit() eq $rankTemplate->get_Hit()) {
  207. ## check overlap
  208. my $overlap = &min($preTemplate->get_Qend(), $rankTemplate->get_Qend()) - &max($preTemplate->get_Qstart(), $rankTemplate->get_Qstart());
  209. my $preLen = $preTemplate->get_Qend() - $preTemplate->get_Qstart();
  210. if ($overlap / $preLen > 0.5) {
  211. $newTemplates->check_and_add($rankTemplate);
  212. last;
  213. }
  214. }
  215. }
  216. }
  217. ## templates found
  218. if ($newTemplates->size() > 0) {
  219. print "preselected templates could be found\n";
  220. $tList = $newTemplates;
  221. } else {
  222. ## keep old templates
  223. print "preselected templates could NOT be found - keep old templates\n";
  224. }
  225. }
  226. print "final templates:\n";
  227. $tList->print();
  228. print "====================================\n\n";
  229. $tList->write_to_file("$outbase.templates");
  230. #############################
  231. ## create "artificial" hhr file for input, needed by hhmakemodel to build pir alignment
  232. $tList->templateList_to_hhr($outbase);
  233. #############################
  234. ## generate pir alignment
  235. ## the hhr file is generated/overwritten by the function call before
  236. ## the a3m is generated above using hhblast
  237. ## CheckTemplates possibly creates "corrected" pdb files and saves them
  238. ## in the "working" directory; then calls hhmakemodel to build outbase.pir
  239. print "checking templates\n";
  240. &CheckTemplates($outbase, $workingDir, $config);
  241. print "\n====================================\n\n";
  242. # if ($config->get_realignProbcons() && $tList->size() > 1) {
  243. # my $pirFile = PirFile->new("$outbase.pir");
  244. # my $fastaFile = $pirFile->to_fasta();
  245. # $fastaFile->write_to_file("$outbase.fas");
  246. # my $probcons = $config->get_probcons();
  247. # &System("$probcons $outbase.fas > $outbase.fas");
  248. # sleep(1);
  249. # my $fastaRealigned = FastaFile->new("$outbase.fas");
  250. # my $pirRealigned = $fastaRealigned->to_pir();
  251. # $pirRealigned->write_to_file("$outbase.pir");
  252. # }
  253. #############################
  254. ## Modeller initialization
  255. print "Modeller initialization\n";
  256. &ModellerRSR(
  257. queryName => $queryName,
  258. workingDir => $workingDir,
  259. outbase => $outbase,
  260. config => $config
  261. );
  262. #############################
  263. ## replace distance restraints
  264. ## and call Modeller
  265. if ($config->get_replaceDistanceRestraints()) {
  266. my $normalizer = &ChangeDistanceRestraints($outbase, $workingDir, $tList, $config);
  267. &ModellerNewDistance(
  268. rsrFile => "$outbase.new.rsr",
  269. queryName => $queryName,
  270. workingDir => $workingDir,
  271. outbase => $outbase,
  272. config => $config,
  273. normalizer => $normalizer);
  274. } else {
  275. &ModellerRaw(
  276. rsrFile => "$outbase.rsr",
  277. queryName => $queryName,
  278. workingDir => $workingDir,
  279. outbase => $outbase,
  280. config => $config
  281. );
  282. }
  283. &WriteCASPpdbFile("$outbase.pdb", "$outbase.pdb", $queryName, $server, $tList);
  284. if ($config->get_assessModel()) {
  285. $tList->set_queryLength($queryLength);
  286. my $assessor = QualityAssessModel->new(tList=>$tList, outbase=>$outbase, pdbFile=>"$outbase.pdb");
  287. $assessor->run();
  288. }
  289. #############################
  290. ## clean up
  291. my @tabs = <$outbase*.tab>;
  292. for (my $i=0; $i<@tabs; $i++) {
  293. system("rm $tabs[$i]");
  294. }
  295. #############################
  296. ## copy files of interest back to outdir , e.g. to the tempdir
  297. &System("cp $outbase.pdb $outFile");
  298. #&System("rm -r $workingDir");
  299. ## create a directory containing intermediate files
  300. #my $resultDir = $dirbasename . "Results";
  301. #&System("mkdir -p $resultDir");
  302. #&System("chmod 777 $resultDir");
  303. #&System("cp $outbase.* $resultDir");
  304. print "END HHpred for $queryName\n";
  305. print "=========================\n";