modeller.pm 70 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893
  1. ##########################################################
  2. ## this package contains functions handling modeller, i.e.
  3. ## creating restraint file
  4. ## changing restraint file
  5. ## calling modeller program
  6. ##########################################################
  7. package modeller;
  8. require(Exporter);
  9. use strict;
  10. use config;
  11. use utilities;
  12. use TemplateList;
  13. use multiTemplateMDN;
  14. use distanceTree;
  15. use FixOccupancy;
  16. our @ISA = qw(Exporter);
  17. our @EXPORT = qw(Modeller ModellerRSR ModellerRaw ModellerNewDistance ChangeDistanceRestraints WriteCASPpdbFile reweighModellerDistRestraints);
  18. ######################################################
  19. ## input: name of query
  20. ## directory of query
  21. ## inbase: basename of files read in (pir)
  22. ## outbase: basename for outputs
  23. ##
  24. ## output: indirect: py file is written and modeller
  25. ## is called generating some more files
  26. ##
  27. ## description: calls Modeller and creates restraints
  28. ## file - needed when restraints file
  29. ## is to be changed
  30. ######################################################
  31. sub ModellerRSR{
  32. my %args = @_;
  33. my $queryName = $args{queryName};
  34. my $workingDir = $args{workingDir};
  35. my $outbase = $args{outbase};
  36. my $config = $args{config};
  37. my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
  38. my $tmpdir = $workingDir;
  39. my $modeller = $config->get_modeller();
  40. #Modeller constructs only restraints file
  41. my $offset;
  42. #Change sequence name to id_temp and get templates -> $templates
  43. my $templates = "";
  44. my $knowns = "";
  45. my %knowns;
  46. open (IN, "<$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  47. my @lines = <IN>;
  48. close(IN);
  49. $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
  50. for (my $i=0; $i<@lines; $i++) {
  51. if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
  52. $offset = $1-1;
  53. $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
  54. }
  55. elsif ($lines[$i] =~ /^structureX:/) {
  56. $lines[$i-1] =~ />P1;(\S+)/;
  57. $knowns .= " '$1'," if (not exists($knowns{$1}));
  58. $templates .= " $1";
  59. $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
  60. }
  61. }
  62. $templates =~ s/^ //;
  63. open (OUT, "> $outbase.pir") or die ("Error: cannt write $outbase.pir: $!\n");
  64. print(OUT @lines);
  65. close(OUT);
  66. # Write the py-file
  67. open(PY,">$outbase.py") or die("Cannot open: $outbase.py");
  68. print PY ("# Homology modeling by the automodel class\n");
  69. print PY ("from modeller import * # Load standard Modeller classes\n");
  70. print PY ("from modeller.automodel import * # Load the automodel class\n");
  71. print PY ("log.verbose()\n");
  72. print PY ("env = environ()\n");
  73. print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
  74. print PY ("a = automodel(env,\n");
  75. print PY (" alnfile = '$outbase.pir', # alignment filename\n");
  76. print PY (" knowns = ($knowns), # codes of the templates\n");
  77. print PY (" sequence = '$queryName') # code of the target\n");
  78. print PY ("a.homcsr(0)\n");
  79. close (PY);
  80. #prepare for MODELLER:
  81. system("chmod 777 $outbase.pir");
  82. system("chmod 777 $outbase.py");
  83. #run MODELLER: create *.ini & *.rsr
  84. &System("cd $tmpdir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
  85. #TREAT FAILED MODELLER!
  86. return;
  87. }
  88. sub ModellerRaw {
  89. my %args = @_;
  90. my $myrsr = $args{rsrFile};
  91. my $queryName = $args{queryName};
  92. my $workingDir = $args{workingDir};
  93. my $outbase = $args{outbase};
  94. my $config = $args{config};
  95. # print "myrsr=$myrsr\n";
  96. # print "queryName=$queryName\n";
  97. # print "workingDir=$workingDir\n";
  98. # print "outbase=$outbase\n";
  99. my $NMODELS = $config->get_numberOfGeneratedModels();
  100. my $modeller = $config->get_modeller();
  101. my $pdbdir = $config->get_pdbdir();
  102. my $offset;
  103. my $templates = "";
  104. my $knowns = "";
  105. my %knowns;
  106. open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  107. my @lines = <IN>;
  108. close(IN);
  109. $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
  110. for (my $i=0; $i<@lines; $i++) {
  111. if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
  112. $offset = $1-1;
  113. $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
  114. }
  115. elsif ($lines[$i] =~ /^structureX:/) {
  116. $lines[$i-1] =~ />P1;(\S+)/;
  117. $knowns .= " '$1'," if (not exists($knowns{$1}));
  118. $templates .= " $1";
  119. $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
  120. }
  121. }
  122. $templates =~ s/^ //;
  123. open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  124. print(OUT @lines);
  125. close(OUT);
  126. # Write the py-file
  127. open(PY, "> $outbase.py") or die("Cannot write $outbase.py");
  128. print PY ("# Homology modeling by the automodel class\n");
  129. print PY ("from modeller import * # Load standard Modeller classes\n");
  130. print PY ("from modeller.automodel import * # Load the automodel class\n");
  131. print PY ("log.verbose()\n");
  132. print PY ("env = environ()\n");
  133. print PY ("# directories for input atom files\n");
  134. print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
  135. print PY ("a = automodel(env,\n");
  136. print PY (" alnfile = '$outbase.pir', # alignment filename\n");
  137. print PY (" knowns =($knowns), # codes of the templates\n");
  138. print PY (" sequence ='$queryName', # code of the target\n");
  139. print PY (" csrfile = '$myrsr') \n");
  140. print PY ("a.starting_model= 1 # index of the first model\n");
  141. print PY ("a.ending_model = $NMODELS # index of the last model\n");
  142. print PY ("a.make() # do the actual homology modeling\n");
  143. close (PY);
  144. #prepare for MODELLER:
  145. system("chmod 777 $outbase.pir");
  146. system("chmod 777 $outbase.py");
  147. #run MODELLER: create *.ini & *.rsr
  148. &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
  149. #TREAT FAILED MODELLER!
  150. #check model(s):
  151. my @modelsBuilt;
  152. for (my $i=1; $i<=$NMODELS; $i++) {
  153. my $modelID = "B" . (99990000 + $i);
  154. if (! -e "$outbase.$modelID.pdb") {
  155. print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
  156. #EXCEPTION does not work!!!!debug hhmakemodel.pl
  157. #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
  158. }
  159. else {
  160. push (@modelsBuilt, $i);
  161. }
  162. }
  163. # find model with minimal energy
  164. if (scalar(@modelsBuilt) > 0) {
  165. my $minEnergy = +1E8;
  166. my $imin = -1;
  167. my $line;
  168. my $bestModelID;
  169. ## search for best model (lowest objective function value)
  170. ## and save this model - remove all others
  171. foreach my $model (@modelsBuilt) {
  172. my $modelID = "B" . (99990000 + $model);
  173. open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
  174. my $energy;
  175. while ($line = <IN>) {
  176. if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
  177. $energy = $1;
  178. last;
  179. }
  180. }
  181. if ($energy < $minEnergy) {
  182. $minEnergy = $energy;
  183. $imin = $model;
  184. }
  185. close(IN);
  186. }
  187. if ($imin != -1) {
  188. my $minModelID = "B" . (99990000 + $imin);
  189. &System("cp $outbase.$minModelID.pdb $outbase.pdb");
  190. } else {
  191. print "WARNING: MODELLER no minimal energy structure found!\n";
  192. }
  193. }
  194. &System("rm $outbase.sch $outbase.V999* $outbase.D000*");
  195. return;
  196. }
  197. sub Modeller {
  198. my %args = @_;
  199. my $queryName = $args{queryName};
  200. my $workingDir = $args{workingDir};
  201. my $outbase = $args{outbase};
  202. my $config = $args{config};
  203. my $physicalScale = 1;
  204. $physicalScale = $args{physicalScale} if (defined($args{physicalScale}));
  205. my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
  206. my $NMODELS = $config->get_numberOfGeneratedModels();
  207. my $modeller = $config->get_modeller();
  208. my $offset;
  209. my $templates = "";
  210. my $knowns = "";
  211. my %knowns;
  212. open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  213. my @lines = <IN>;
  214. close(IN);
  215. $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
  216. for (my $i=0; $i<@lines; $i++) {
  217. if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
  218. $offset = $1-1;
  219. $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
  220. }
  221. elsif ($lines[$i] =~ /^structureX:/) {
  222. $lines[$i-1] =~ />P1;(\S+)/;
  223. $knowns .= " '$1'," if (not exists($knowns{$1}));
  224. $templates .= " $1";
  225. $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
  226. }
  227. }
  228. $templates =~ s/^ //;
  229. open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  230. print(OUT @lines);
  231. close(OUT);
  232. # Write the py-file
  233. open(PY, "> $outbase.py") or die("Cannot write $outbase.py");
  234. print PY ("# Homology modeling by the automodel class\n");
  235. print PY ("from modeller import * # Load standard Modeller classes\n");
  236. print PY ("from modeller.automodel import * # Load the automodel class\n");
  237. print PY ("log.verbose()\n");
  238. print PY ("env = environ()\n");
  239. if ($physicalScale != 1) {
  240. print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$physicalScale, n_o_distance=$physicalScale, sd_mn_distance=$physicalScale, sd_sd_distance=$physicalScale)\n");
  241. }
  242. print PY ("# directories for input atom files\n");
  243. print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
  244. print PY ("a = automodel(env,\n");
  245. print PY (" alnfile = '$outbase.pir', # alignment filename\n");
  246. print PY (" knowns = ($knowns), # codes of the templates\n");
  247. print PY (" sequence = '$queryName') # code of the target\n");
  248. print PY ("a.starting_model= 1 # index of the first model\n");
  249. print PY ("a.ending_model = $NMODELS # index of the last model\n");
  250. # print PY ("a.deviation = 1.0\n");
  251. # print PY ("a.spline_on_site = False # do not use splines for approximation\n");
  252. # print PY ("a.library_schedule = autosched.very_fast\n");
  253. # print PY ("a.max_var_iterations = 300\n");
  254. # print PY ("a.repeat_optimization = 2\n");
  255. # print PY ("a.write_intermediates = True\n");
  256. print PY ("a.make() # do the actual homology modeling\n");
  257. close (PY);
  258. #prepare for MODELLER:
  259. system("chmod 777 $outbase.pir");
  260. system("chmod 777 $outbase.py");
  261. #run MODELLER: create *.ini & *.rsr
  262. &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
  263. my @modelsBuilt;
  264. #TREAT FAILED MODELLER!
  265. #check model(s):
  266. for (my $i=1; $i<=$NMODELS; $i++) {
  267. my $modelID = "B" . (99990000 + $i);
  268. if (! -e "$outbase.$modelID.pdb") {
  269. print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
  270. #EXCEPTION does not work!!!!debug hhmakemodel.pl
  271. #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
  272. }
  273. else {
  274. push (@modelsBuilt, $i);
  275. }
  276. }
  277. # find model with minimal energy
  278. if (scalar(@modelsBuilt) > 0) {
  279. my $minEnergy = +1E8;
  280. my $imin = -1;
  281. my $line;
  282. ## search for best model (lowest objective function value)
  283. ## and save this model - remove all others
  284. foreach my $model (@modelsBuilt) {
  285. my $modelID = "B" . (99990000+$model);
  286. open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
  287. my $energy;
  288. while ($line = <IN>) {
  289. if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
  290. $energy = $1;
  291. last;
  292. }
  293. }
  294. if ($energy < $minEnergy) {
  295. $minEnergy = $energy;
  296. $imin = $model;
  297. }
  298. close(IN);
  299. }
  300. if ($imin != -1) {
  301. my $minModelID = "B" . (99990000+$imin);
  302. &System("cp $outbase.$minModelID.pdb $outbase.pdb");
  303. } else {
  304. print "WARNING: MODELLER no minimal energy structure found!\n";
  305. }
  306. }
  307. &System("rm $outbase.sch $outbase.V999* $outbase.D000* $outbase.ini ");
  308. return;
  309. }
  310. #############################################################
  311. ## input: myrsr: rsr file
  312. ## queryName:
  313. ## workingDir: where save (temp) files
  314. ## outbase: outbase for pir files
  315. ## config:
  316. ##
  317. ##
  318. ## output: mod file containing the best modeller built model
  319. #############################################################
  320. sub ModellerNewDistance {
  321. my %args = @_;
  322. my $myrsr = $args{rsrFile};
  323. my $queryName = $args{queryName};
  324. my $workingDir = $args{workingDir};
  325. my $outbase = $args{outbase};
  326. my $config = $args{config};
  327. my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
  328. my $templWeightNormalizer = $args{normalizer};
  329. my $doTemplWeight = 1;
  330. $doTemplWeight = $args{doTemplWeightEqual} if (defined($args{doTemplWeightEqual}));
  331. my $physicalScale = 1;
  332. $physicalScale = $args{physicalScale} if (defined($args{physicalScale}));
  333. my $NMODELS = $config->get_numberOfGeneratedModels();
  334. my $modeller = $config->get_modeller();
  335. my $numTemplates = 0;
  336. #Change sequence name to id_temp and get templates -> $templates
  337. my $templates = "";
  338. my $knowns = "";
  339. my %knowns;
  340. my @allTemplates; ## $knowns does not contain duplicates
  341. my %templateWeights;
  342. open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  343. my @lines = <IN>;
  344. close(IN);
  345. $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
  346. for (my $i=0; $i<@lines; $i++) {
  347. if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
  348. $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
  349. }
  350. elsif ($lines[$i] =~ /^structureX:/) {
  351. $lines[$i-1] =~ />P1;(\S+)/;
  352. push (@allTemplates, $1);
  353. $knowns .= " '$1'," if (not exists($knowns{$1}));
  354. $templates .= " $1";
  355. $numTemplates++;
  356. $knowns{$1} = 1;
  357. }
  358. }
  359. $templates =~ s/^ //;
  360. ## weight for template based restraints
  361. ## default
  362. my $rsrWeight = 1.0/$numTemplates;
  363. $rsrWeight = sprintf("%.3f", $rsrWeight);
  364. my $templRsrWeight = 1.0/$templWeightNormalizer;
  365. ## TODO: adapt weight to new template-weighting strategy
  366. open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
  367. print(OUT @lines);
  368. close(OUT);
  369. # Write the py-file
  370. open(PY,"> $outbase.py") or die("Cannot open: $outbase.py");
  371. if ($config->get_templateWeightStrategy() == 1) { ## WITH phylo weight
  372. print PY ("# Homology modeling by the automodel class\n");
  373. print PY ("from modeller import * # Load standard Modeller classes\n");
  374. print PY ("from modeller.automodel import * # Load the automodel class\n");
  375. if ($config->get_doParallelModeller()) {
  376. print PY ("from modeller.parallel import *\n");
  377. }
  378. print PY ("from modeller.scripts import complete_pdb\n");
  379. print PY ("from modeller import _cuser_form54\n");
  380. print PY ("log.verbose()\n");
  381. print PY ("env = environ()\n");
  382. ## downweight template based restraints
  383. if ($doTemplWeight) {
  384. my $rsrWeight = $physicalScale;
  385. print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$rsrWeight, n_o_distance=$rsrWeight, sd_mn_distance=$rsrWeight, sd_sd_distance=$rsrWeight)\n"); #, chi1_dihedral=$templRsrWeight, chi2_dihedral=$templRsrWeight, chi3_dihedral=$templRsrWeight, chi4_dihedral=$templRsrWeight, phi_dihedral=$templRsrWeight, psi_dihedral=$templRsrWeight, omega_dihedral=$templRsrWeight, phi_psi_dihedral=$templRsrWeight)\n");
  386. }
  387. print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
  388. print PY ("\n");
  389. print PY ("class MyGauss(forms.restraint_form):\n");
  390. print PY (" \"\"\"Multiple Gaussian\"\"\"\n");
  391. print PY (" _builtin_index = _cuser_form54.myform_create()\n");
  392. print PY (" def __init__(self, group, feature, weights, means, stdevs, phyloWeight):\n");
  393. print PY (" lv = -1\n");
  394. print PY (" for var in (weights, means, stdevs):\n");
  395. print PY (" if (lv >= 0 and lv != len(var)) \\\n");
  396. print PY (" or not isinstance(var, (tuple, list)):\n");
  397. print PY (" raise TypeError(\"weights, means and stdevs should all be \"\n");
  398. print PY (" + \"sequences of the same length\")\n");
  399. print PY (" lv = len(var)\n");
  400. print PY (" forms.restraint_form.__init__(self, group, feature, len(weights),\n");
  401. print PY (" tuple(weights) + tuple(means) + tuple(stdevs) + phyloWeight)\n");
  402. print PY ("a = automodel(env,\n");
  403. print PY (" alnfile = '$outbase.pir', # alignment filename\n");
  404. print PY (" knowns =($knowns), # codes of the templates\n");
  405. print PY (" sequence ='$queryName', # code of the target\n");
  406. print PY (" csrfile = '$myrsr') # use 'my' restraints file\n");
  407. print PY ("a.starting_model= 1 # index of the first model\n");
  408. print PY ("a.ending_model = $NMODELS # index of the last model\n");
  409. # print PY ("a.library_schedule = autosched.slow\n");
  410. # print PY ("a.max_var_iterations = 300\n");
  411. # print PY ("a.repeat_optimization = 2\n");
  412. # print PY ("a.deviation = 0.0\n");
  413. # print PY ("a.write_intermediates = True\n");
  414. if ($config->get_doParallelModeller()) {
  415. ## all slaves have to have their own initialization with new restraints...
  416. print PY ("j = job()\n");
  417. print PY ("j.slave_startup_commands.append(\"from modeller import _cuser_form54\")\n");
  418. print PY ("j.slave_startup_commands.append(\"class MyGauss(forms.restraint_form):\\n\" + \n");
  419. print PY (" \" _builtin_index = _cuser_form54.myform_create()\")\n");
  420. my $cpus = $config->get_cpus();
  421. for (my $i=0; $i < &min($NMODELS, $cpus); $i++) {
  422. print PY ("j.append(local_slave())\n");
  423. }
  424. print PY ("a.use_parallel_job(j)\n");
  425. }
  426. print PY ("a.make() # do the actual homology modeling\n");
  427. } else {
  428. ## NO PHYLO WEIGHT
  429. print PY ("# Homology modeling by the automodel class\n");
  430. print PY ("from modeller import * # Load standard Modeller classes\n");
  431. print PY ("from modeller.automodel import * # Load the automodel class\n");
  432. if ($config->get_doParallelModeller()) {
  433. print PY ("from modeller.parallel import *\n");
  434. }
  435. print PY ("from modeller.scripts import complete_pdb\n");
  436. print PY ("from modeller import _cuser_form52\n");
  437. print PY ("log.verbose()\n");
  438. print PY ("env = environ()\n");
  439. ## downweight template based restraints
  440. if ($doTemplWeight) {
  441. ## !!! TEST !!!
  442. my $rsrWeight = $physicalScale;
  443. print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$rsrWeight, n_o_distance=$rsrWeight, sd_mn_distance=$rsrWeight, sd_sd_distance=$rsrWeight)\n"); #, chi1_dihedral=$rsrWeight, chi2_dihedral=$rsrWeight, chi3_dihedral=$rsrWeight, chi4_dihedral=$rsrWeight, phi_dihedral=$rsrWeight, psi_dihedral=$rsrWeight, omega_dihedral=$rsrWeight, phi_psi_dihedral=$rsrWeight)\n");
  444. ## !!! end TEST !!!
  445. }
  446. print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
  447. print PY ("\n");
  448. print PY ("class MyGauss(forms.restraint_form):\n");
  449. print PY (" \"\"\"Multiple Gaussian\"\"\"\n");
  450. print PY (" _builtin_index = _cuser_form52.myform_create()\n");
  451. print PY (" def __init__(self, group, feature, weights, means, stdevs):\n");
  452. print PY (" lv = -1\n");
  453. print PY (" for var in (weights, means, stdevs):\n");
  454. print PY (" if (lv >= 0 and lv != len(var)) \\\n");
  455. print PY (" or not isinstance(var, (tuple, list)):\n");
  456. print PY (" raise TypeError(\"weights, means and stdevs should all be \"\n");
  457. print PY (" + \"sequences of the same length\")\n");
  458. print PY (" lv = len(var)\n");
  459. print PY (" forms.restraint_form.__init__(self, group, feature, len(weights),\n");
  460. print PY (" tuple(weights) + tuple(means) + tuple(stdevs))\n");
  461. print PY ("a = automodel(env,\n");
  462. print PY (" alnfile = '$outbase.pir', # alignment filename\n");
  463. print PY (" knowns =($knowns), # codes of the templates\n");
  464. print PY (" sequence ='$queryName', # code of the target\n");
  465. print PY (" csrfile = '$myrsr') # use 'my' restraints file\n");
  466. print PY ("a.starting_model= 1 # index of the first model\n");
  467. print PY ("a.ending_model = $NMODELS # index of the last model\n");
  468. # print PY ("a.library_schedule = autosched.slow\n");
  469. # print PY ("a.max_var_iterations = 300\n");
  470. # print PY ("a.repeat_optimization = 2\n");
  471. # print PY ("a.deviation = 0.0\n");
  472. # print PY ("a.write_intermediates = True\n");
  473. if ($config->get_doParallelModeller()) {
  474. ## all slaves have to have their own initialization with new restraints...
  475. print PY ("j = job()\n");
  476. print PY ("j.slave_startup_commands.append(\"from modeller import _cuser_form52\")\n");
  477. print PY ("j.slave_startup_commands.append(\"class MyGauss(forms.restraint_form):\\n\" + \n");
  478. print PY (" \" _builtin_index = _cuser_form52.myform_create()\")\n");
  479. my $cpus = $config->get_cpus();
  480. for (my $i=0; $i < &min($cpus,$NMODELS); $i++) {
  481. print PY ("j.append(local_slave())\n");
  482. }
  483. print PY ("a.use_parallel_job(j)\n");
  484. }
  485. print PY ("a.make() # do the actual homology modeling\n");
  486. }
  487. close (PY);
  488. system("chmod 777 $outbase.pir");
  489. system("chmod 777 $outbase.py");
  490. #run MODELLER:
  491. if ($config->get_doParallelModeller()) {
  492. my $modellerParallel = $config->get_modellerParallel();
  493. &System("cd $workingDir; $modellerParallel $outbase.py > $workingDir/$queryName.modeller.log");
  494. } else {
  495. &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
  496. }
  497. ## modelling results, e.g. pdb files
  498. my $resultbase = $outbase;
  499. my @modelsBuilt;
  500. #TREAT FAILED MODELLER!
  501. #check model(s):
  502. for (my $i=1; $i<=$NMODELS; $i++) {
  503. my $modelID = "B" . (99990000+$i);
  504. if (! -e "$outbase.$modelID.pdb") {
  505. print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
  506. #EXCEPTION does not work!!!!debug hhmakemodel.pl
  507. #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
  508. }
  509. else {
  510. push (@modelsBuilt, $i);
  511. }
  512. }
  513. # find model with minimal energy
  514. if (scalar(@modelsBuilt) > 0) {
  515. my $minEnergy = +1E8;
  516. my $imin = -1;
  517. my $line;
  518. ## search for best model (lowest objective function value)
  519. ## and save this model - remove all others
  520. foreach my $model (@modelsBuilt) {
  521. my $modelID = "B" . (99990000+$model);
  522. open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
  523. my $energy;
  524. while ($line = <IN>) {
  525. if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
  526. $energy = $1;
  527. last;
  528. }
  529. }
  530. if ($energy < $minEnergy) {
  531. $minEnergy = $energy;
  532. $imin = $model;
  533. }
  534. close(IN);
  535. }
  536. if ($imin != -1) {
  537. my $minModelID = "B" . (99990000+$imin);
  538. &System("cp $outbase.$minModelID.pdb $resultbase.pdb");
  539. } else {
  540. print "WARNING: MODELLER no minimal energy structure found!\n";
  541. }
  542. }
  543. &System("rm $resultbase.sch $resultbase.V999* $resultbase.D000*");
  544. #Add END line
  545. #my $repairpdb = $config->get_repairPDB();
  546. ## repairpdb is not necessary any more in newer Modeller version
  547. # &System("$repairpdb $resultbase.mod &> /dev/null");
  548. return;
  549. }
  550. sub readTemplateWeightsFromFile {
  551. my $file = shift;
  552. my %weights;
  553. open(TW, "< $file") or die "Cant read $file: $!\n";
  554. while (my $line = <TW>) {
  555. if ($line =~ /(\S+)\s+(\S+)/) {
  556. $weights{$1} = $2;
  557. }
  558. }
  559. close(TW);
  560. return %weights;
  561. }
  562. #############################################
  563. ## ugly but faster than passing by reference
  564. ## maybe avoid it by writing a class instead
  565. my @PDBs;
  566. my @atomsForResidue;
  567. #################################################################
  568. ## input: rsrfile
  569. ## inifile
  570. ## inbase
  571. ## array of templates
  572. ## output: side effect: write out a new restraint file:
  573. ## rsrfile.new.rsr
  574. #################################################################
  575. sub ChangeDistanceRestraints {
  576. my $outbase = shift;
  577. my $workingDir = shift;
  578. my $templateList = shift;
  579. my $config = shift;
  580. my $pdbdir = shift || $config->get_pdbdir();
  581. my $queryName = &get_basename($outbase);
  582. my $weightTemplates = $config->get_templateWeightStrategy();
  583. my %templateWeight;
  584. my $templWeightNormalizer = 0;
  585. @PDBs = ();
  586. @atomsForResidue = ();
  587. if ($weightTemplates == 1) {
  588. if ($templateList->size() == 1) {
  589. $templateWeight{$templateList->get(0)->get_Hit()} = 1;
  590. $templWeightNormalizer = 1;
  591. } else {
  592. ## copy template hhm files to workingDir
  593. for (my $i=0; $i<$templateList->size(); $i++) {
  594. my $tname = $templateList->get($i)->get_Hit();
  595. system("cp $pdbdir/$tname.hhm $workingDir");
  596. }
  597. ## build upgma tree, leaves are numbered according to row-number in templateList (0=query)
  598. my @distanceMatrix = &calculatePairwiseDistances($queryName, $templateList, $workingDir, $workingDir);
  599. my $phyloTree = &upgma(@distanceMatrix);
  600. ## root tree at query (= node 0)
  601. print "PhyloTree:\n";
  602. print "==========\n";
  603. print $phyloTree->print() . "\n";
  604. my $rerootedTree = $phyloTree->reRoot(0);
  605. print "rerootedTree:\n";
  606. print "=============\n";
  607. $rerootedTree->print();
  608. print "=============\n";
  609. ## tweights containts leaf/row=>weight entries
  610. my %tweights = &iterativelyWeightTemplates($rerootedTree);
  611. ## replace indices with template names
  612. foreach my $leaf (keys %tweights) {
  613. my $tname = $templateList->get($leaf-1)->get_Hit();
  614. $templateWeight{$tname} = $tweights{$leaf};
  615. }
  616. print "ChangeDistanceRestraints template weights\n";
  617. foreach my $template (keys %templateWeight) {
  618. print "$template=>$templateWeight{$template}\n";
  619. $templWeightNormalizer += $templateWeight{$template};
  620. }
  621. print "==============================\n";
  622. }
  623. } else {
  624. $templWeightNormalizer = $templateList->size();
  625. }
  626. ## these files must have been generated before
  627. my $rsrfile = "$outbase.rsr";
  628. my $inifile = "$outbase.ini";
  629. my $pirfile = "$outbase.pir";
  630. #get query length
  631. #my $querylength = $templateList->get_queryLength();
  632. ## set up mixture density networks
  633. ## these will be needed later in order to calculate parameters of new contraints
  634. my $mdnCACA = multiTemplateMDN->new($config->get_MDNWeightsLayer1CACA(), $config->get_MDNWeightsLayer2CACA());
  635. my $mdnNO = multiTemplateMDN->new($config->get_MDNWeightsLayer1NO(), $config->get_MDNWeightsLayer2NO());
  636. my $mdnSCMC = multiTemplateMDN->new($config->get_MDNWeightsLayer1SCMC(), $config->get_MDNWeightsLayer2SCMC());
  637. my $mdnSCSC = multiTemplateMDN->new($config->get_MDNWeightsLayer1SCSC(), $config->get_MDNWeightsLayer2SCSC());
  638. #get similarity of each template and read TAB file(s)
  639. my @sim; #contains similarities of Templates
  640. my @templates;#contains PDB-Identifier of Templates
  641. my @TABprobs; #contains probability of ij to according i from *.tab file
  642. #use *.afh file (hit list with filter strength of each hit):
  643. #
  644. #HHR HIT TEMPLATE PROB SCORE SS/Length SIM SumPr/L QueryStart-End COLS TempStart-End TMSCORE
  645. #1 1 1rwz_A 98.0 56.2 0.0833 0.164 0.5697 3 225 213 5 245 0.522695
  646. #2 1 1rwz_A 97.3 56.2 0.0882 0.177 0.5412 2 225 211 9 245 0.518335
  647. #0 1 1rwz_A 98.0 56.2 0.0794 0.173 0.5697 2 225 214 5 245 0.517708
  648. #1 5 1plq 97.0 43.8 0.0877 0.094 0.5342 1 225 217 2 257 0.503807
  649. #...
  650. ## for each template ...
  651. for(my $t=0; $t<$templateList->size(); $t++) {
  652. my $template = $templateList->get($t);
  653. $templates[$t] = $template->get_Hit();
  654. my $hitnr = $template->get_No();
  655. my $hhrnr = $template->get_Filt();
  656. $sim[$t] = $template->get_Sim();
  657. #my $templatelength=$4-$3+1;
  658. my $tab = "$outbase." . $templates[$t] . ".HIT$hitnr.tab";
  659. print "HIT: $t TAB:$tab SIM:$sim[$t]\n";
  660. ## ... build TAB file, based on corresponding tab file
  661. if (! (-e "$tab")) {
  662. &BuildSingleTabFile("$outbase.$hhrnr.tab", $hitnr, $outbase);
  663. }
  664. open(TAB,"< $tab") or die("Error reading $tab: $!\n");
  665. while (my $line = <TAB>) {
  666. # i j score SS probab
  667. # 4 400 -0.64 -0.28 0.0101
  668. if ($line =~ /\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)/) {
  669. my $i = $1;
  670. my $j = $2;
  671. my $probab = $3;
  672. if (!($TABprobs[$i])) {
  673. $TABprobs[$1] = [$probab,$t,$j];
  674. }
  675. else {
  676. push(@{$TABprobs[$i]},$probab,$t,$j);
  677. }
  678. }
  679. }
  680. close(TAB);
  681. my $seenValidLine = 0;
  682. ## ... read template PDB
  683. open(PDBTemp, "< $pdbdir/$templates[$t].pdb") or die("Error reading $pdbdir/$templates[$t].pdb: $!\n");
  684. while (my $line = <PDBTemp>){
  685. #ATOM 1 N PRO 1 -29.477 -9.021 66.175 1.00 75.79 N
  686. #ATOM 3781 CB MET 1 477 25.003 121.648 32.112 1.00 90.08 C
  687. if ($line =~ /^ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*/){
  688. $seenValidLine = 1;
  689. ## save Atomname Residuename ResidueNr x y z
  690. $PDBs[$t][$1]=[$2,$3,$4,$5,$6,$7];
  691. ## speed up: save all atoms for a given residue (in a given template); will be needed later to compute the distance between "matching" atoms
  692. if (not @{$atomsForResidue[$t][$4]}) {
  693. $atomsForResidue[$t][$4] = [$1];
  694. }
  695. else {
  696. push(@{$atomsForResidue[$t][$4]}, $1);
  697. }
  698. }
  699. }
  700. if ($seenValidLine) {
  701. for (my $r=0; $r < @{$atomsForResidue[$t]}; $r++) {
  702. my $rr = @{$atomsForResidue[$t]}[$r];
  703. next if (not defined($rr));
  704. }
  705. }
  706. # $atomsForResiue[$t] = \%atomsForResidue;
  707. close(PDBTemp);
  708. } ## end for all templates
  709. #e.g.: print"$PDBs[4][1227][0] $PDBs[4][1227][1] $PDBs[4][1227][2]\n";
  710. #$TABprobs[x][0] x=atom 0=probabilityij
  711. #$TABprobs[x][1] 1=#of tabfile
  712. #$TABprobs[x][2] 2=probabilityij(e.g. if more than one template is used!!!)
  713. #$TABprobs[x][3] 3=aligned residue number of template
  714. #$TABprobs[x][4] 4=#of tabfile
  715. #... 5=probabilityij(e.g. if more than one template is used!!!)
  716. #... ...
  717. #################################
  718. #read INIPDB file of query: *.ini
  719. #################################
  720. #INIPDB-FILE:
  721. #ATOM 1 N PRO 1 -29.477 -9.021 66.175 1.00 75.79 N
  722. #ATOM 2 CA PRO 1 -28.807 -9.573 65.023 1.00 75.45 C
  723. #...
  724. #1234567890123456789012345678901234567890...
  725. #find atom name in query ini-pdb:
  726. my @iniPDBresNr;
  727. open(iniPDB,"< $inifile") or die("Error reading file: $inifile: $!\n");
  728. while (my $line = <iniPDB>){
  729. if ($line =~ /^ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*/){
  730. ## atomNr => residueNr, atomName, residueName
  731. $iniPDBresNr[$1] = [$4,$2,$3];
  732. }
  733. }
  734. close(iniPDB);
  735. #0:
  736. #1: 1 N MET
  737. #2: 1 CA MET
  738. #3: 1 CB MET
  739. #4: 1 CG MET
  740. #...
  741. #####################
  742. #read restraints file
  743. #####################
  744. open(RSR,"< $rsrfile") or die("Error reading file: $rsrfile: $!\n");
  745. my @rsrlines = <RSR>;
  746. ## R 3 1 1 1 2 2 1 3 2 1.5380 0.0364
  747. ## R 3 1 1 26 2 2 1 1394 1422 3.8982 6.2527
  748. close(RSR);
  749. #Spline restraint
  750. # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
  751. #R 10 39 1 9 2 45 1 25 363 1.0000 0.0000 26.6000 0.7000 -0.6596...
  752. ############################################
  753. #change old restraints in Modeller rsr-file:
  754. ############################################
  755. ## comment: simply using Modellers mean for the template distance
  756. ## works only for single template modelling (otherwise splines are used), therefore
  757. ## one needs a subroutine to extract the distances in templates
  758. ##
  759. my @newrsrfile;
  760. ## if no distance found in template for a specific restraint, the old one can be used (remainRestr=1)
  761. ## or the restraint can be neglected (rmainRestr=0)
  762. my $remainRestr = 1;
  763. for (my $i=1; $i<@rsrlines; $i++) {
  764. #
  765. if ($i % 1000 == 0) {
  766. print "$i iterations in changeRestraints\n";
  767. }
  768. ## 9|10|23|26
  769. if ($rsrlines[$i] =~ /^R\s+(3|10)\s+\d+\s+\d+\s+(9|10|23|26)\s+2\s+\d+\s+\d+\s+(\S+)\s+(\S+)\s+(\S+)/) {
  770. #print "\nLINE:$i $rsrlines[$i]";
  771. #print "rsrType:$1 group:$2 atom1:$3 atom2:$4\n";
  772. #search i, j, delta from *.ini
  773. my $Qi;
  774. my $Qj;
  775. my $queryatomnameOne;
  776. my $queryatomnameTwo;
  777. my $queryresiduenameOne;
  778. my $queryresiduenameTwo;
  779. if ($iniPDBresNr[$3][0] && $iniPDBresNr[$4][0]){
  780. $Qi = $iniPDBresNr[$3][0]; #residue i of Query (number)
  781. $Qj = $iniPDBresNr[$4][0]; #residue j of Query (number)
  782. $queryatomnameOne = $iniPDBresNr[$3][1]; #atomname of residue i of Query
  783. $queryatomnameTwo = $iniPDBresNr[$4][1]; #atomname of residue j of Query
  784. $queryresiduenameOne = $iniPDBresNr[$3][2]; #residuename of i of Query
  785. $queryresiduenameTwo = $iniPDBresNr[$4][2]; #residuename of j of Query
  786. }
  787. else { die("Error in file: $inifile"); }
  788. my $form = $1;
  789. my $rsrType = $2;
  790. my $atom1 = $3;
  791. my $atom2 = $4;
  792. my $checkdelta = $5;
  793. ################################################################################################################
  794. #search prob & sim for i-i' and j-j'from TAB file & calculate new values for new log-normal Gaussian distribution:
  795. if ($TABprobs[$Qi][0] && $TABprobs[$Qj][0]) {
  796. #e.g.:
  797. # atom prob Template alternative probability from alternative Template
  798. # 55: 0.1119 1
  799. # 56: 0.0874 1
  800. # 57: 0.0733 1
  801. # 58: 0.3256 0 0.0454 1
  802. # 59: 0.3552 0 0.0369 1
  803. # 60: 0.4030 0 0.0274 1
  804. # 61: 0.4952 0 0.0178 1
  805. # 62: 0.5366 0
  806. # 63: 0.5986 0
  807. # 64: 0.6060 0
  808. #e.g. case2 (more than one template for i-i'): atom 59: prob i-i' 0.3552 in Template 1 and 0.0369 in Template 2
  809. # atom 63: prob j-j' 0.5986 in Template 1
  810. # so take prob i-i' 0.3552 from Template 1 (NOT from Template 2!!!)
  811. # $TABprobs[$i][+0] = probab
  812. # $TABprobs[$i][+1] = t
  813. # $TABprobs[$i][+2] = j
  814. #if there is more than one template for i-i' AND j-j'
  815. #if there is more than one template for i-i' and template for j-j'
  816. # Q |---------------------------------------------------------------|
  817. # T1 |--------------------------------|
  818. # T2 |-------------------------------|
  819. # j i
  820. if (@{$TABprobs[$Qi]}>3 && @{$TABprobs[$Qj]}>3){
  821. for (my $t=0;$t<@{$TABprobs[$Qi]}; $t=$t+3){
  822. my $probi = $TABprobs[$Qi][$t];
  823. for (my $r=0;$r<@{$TABprobs[$Qj]}; $r=$r+3){
  824. ## Qi and Qj aligned to residues in same template
  825. if ($TABprobs[$Qi][$t+1] == $TABprobs[$Qj][$r+1]){
  826. my $probj = $TABprobs[$Qj][$r]; #probability of j-j
  827. my $sim = $sim[$TABprobs[$Qj][$r+1]]; #similarity of according template
  828. my $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][$t+2],$TABprobs[$Qj][$r+2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qj][$r+1], 1);
  829. if ( ($dtemplate ne "NULL")) {
  830. my $posteriori = $probi*$probj;
  831. if ($Qi - $TABprobs[$Qi][$t+2] == $Qj - $TABprobs[$Qj][$r+2]) {
  832. $posteriori = min($probi, $probj);
  833. }
  834. # next if ($posteriori < 0.2);
  835. ## skip too long distance-restraints
  836. next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
  837. my $newline = "";
  838. my $hit = $templateList->get($TABprobs[$Qi][$t+1])->get_Hit();
  839. if ($weightTemplates) {
  840. my $tweight = $templateWeight{$hit};
  841. $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
  842. }
  843. else {
  844. $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
  845. }
  846. $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][$t+2] $TABprobs[$Qj][$r+2]\n";
  847. push(@newrsrfile, $newline);
  848. }
  849. elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
  850. }
  851. }
  852. }
  853. }
  854. #if there is more than one template for i-i' and one template for j-j'
  855. # Q |---------------------------------------------------------------|
  856. # T1 |--------------------------------|
  857. # T2 |-------------------------------|
  858. # j i
  859. elsif (@{$TABprobs[$Qi]}>3 && @{$TABprobs[$Qj]}==3) {
  860. #print "more than one template for i-i' atom: $atom1 $atom2\n";
  861. my $probi;
  862. my $probj = $TABprobs[$Qj][0]; #probability of j-j'
  863. my $sim = $sim[$TABprobs[$Qj][1]]; #similarity of according template
  864. my $dtemplate = "NULL";
  865. my $posteriori = 0;
  866. for (my $r=1; $r<@{$TABprobs[$Qi]}; $r=$r+3){
  867. if ($TABprobs[$Qj][1] == $TABprobs[$Qi][$r]){
  868. $probi = $TABprobs[$Qi][$r-1]; #probability of i-i'
  869. $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][$r+1],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qj][1], 2);
  870. $posteriori = $probi*$probj;
  871. if ($Qi - $TABprobs[$Qi][$r+1] == $Qj - $TABprobs[$Qj][2]) {
  872. $posteriori = min($probi, $probj);
  873. }
  874. if ( ($dtemplate ne "NULL")) {
  875. # next if ($posteriori < 0.2);
  876. ## skip too long distance-restraints
  877. next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
  878. my $newline = "";
  879. my $hit = $templateList->get($TABprobs[$Qj][1])->get_Hit();
  880. if ($weightTemplates) {
  881. my $tweight = $templateWeight{$hit};
  882. $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
  883. }
  884. else {
  885. $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
  886. }
  887. $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][$r+1] $TABprobs[$Qj][2]\n";
  888. push(@newrsrfile,$newline);
  889. }
  890. elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
  891. }
  892. }
  893. }
  894. #if there is more than one template for j-j' and one template for i-i'
  895. # Q |---------------------------------------------------------------|
  896. # T1 |------------------------------------|
  897. # T2 |-----------------------------------|
  898. # i j
  899. elsif (@{$TABprobs[$Qi]}==3 && @{$TABprobs[$Qj]}>3){
  900. #print "more than one template for j-j' atom: $atom1 $atom2\n";
  901. my $probi = $TABprobs[$Qi][0]; #probability of i-i'
  902. my $probj;
  903. my $sim = $sim[$TABprobs[$Qi][1]]; #similarity of according template
  904. my $dtemplate = "NULL";
  905. my $posteriori = 0;
  906. for (my $r=1;$r<@{$TABprobs[$Qj]}; $r=$r+3){
  907. if ($TABprobs[$Qi][1] == $TABprobs[$Qj][$r]){
  908. $probj = $TABprobs[$Qj][$r-1]; #probability of j-j'
  909. $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][$r+1],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 3);
  910. $posteriori = $probi*$probj;
  911. if ($Qi - $TABprobs[$Qi][2] == $Qj - $TABprobs[$Qj][$r+1]) {
  912. $posteriori = min($probi, $probj);
  913. }
  914. if ( ($dtemplate ne "NULL")) {
  915. # next if ($posteriori < 0.2);
  916. ## skip too long distance-restraints
  917. next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
  918. my $newline = "";
  919. my $hit = $templateList->get($TABprobs[$Qi][1])->get_Hit();
  920. if ($weightTemplates) {
  921. my $tweight = $templateWeight{$hit};
  922. $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
  923. }
  924. else {
  925. $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
  926. }
  927. $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][2] $TABprobs[$Qj][$r+1]\n";
  928. push(@newrsrfile, $newline);
  929. }
  930. elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
  931. }
  932. }
  933. }
  934. #only one template (per position; but it is possible that the two residues are (one-fold) covered by two
  935. # different (single covering) templates) => test if same template
  936. # Q |--------------------------------------------------------|
  937. # T1 |----------------------|
  938. # T2 |-------------------|
  939. # i j
  940. elsif (@{$TABprobs[$Qi]}==3 && @{$TABprobs[$Qj]}==3) {
  941. my $probi = $TABprobs[$Qi][0]; #probability of i-i'
  942. my $probj = $TABprobs[$Qj][0]; #probability of j-j'
  943. my $sim = $sim[$TABprobs[$Qi][1]]; #similarity of according template
  944. my $dtemplate = "NULL";
  945. # same template???
  946. if ($TABprobs[$Qi][1] == $TABprobs[$Qj][1]) {
  947. $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 4);
  948. #if ($atom1==19) {
  949. # print "DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 4)=$dtemplate\n";
  950. #}
  951. }
  952. if ( ($dtemplate ne "NULL")) {
  953. my $posteriori = $probi*$probj;
  954. if ($Qi - $TABprobs[$Qi][2] == $Qj - $TABprobs[$Qj][2]) {
  955. $posteriori = min($probi, $probj);
  956. }
  957. # next if ($posteriori < 0.2);
  958. ## skip too long distance-restraints
  959. next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
  960. my $newline = "";
  961. my $hit = $templateList->get($TABprobs[$Qi][1])->get_Hit();
  962. if ($weightTemplates) {
  963. my $tweight = $templateWeight{$hit};
  964. $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
  965. }
  966. else {
  967. $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
  968. }
  969. $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][2] $TABprobs[$Qj][2]\n";
  970. push(@newrsrfile,$newline);
  971. }
  972. elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
  973. }
  974. else{print "WARNING1: error in line: $rsrlines[$i] check PIR-Alignment: $outbase.pir and Tab!\n";}
  975. }
  976. else{
  977. print "WARNING2: in line: $rsrlines[$i] check tabfile(s), residues Qi=$Qi, Qj=$Qj; keep old restraint!\n";
  978. if ($remainRestr) {push(@newrsrfile, $rsrlines[$i])};
  979. }
  980. }
  981. else {push(@newrsrfile,$rsrlines[$i]);}
  982. }
  983. $rsrfile =~ s/\.rsr$//;
  984. open (OUT, ">$rsrfile.new.rsr") or die ("Error: cannot open $rsrfile.new.rsr: $!\n");
  985. print(OUT @newrsrfile);
  986. close(OUT);
  987. return $templWeightNormalizer;
  988. }
  989. ########################################################
  990. ## input: self explanatory, see arguments
  991. ## output: distance between specified atoms in specified
  992. ## residue
  993. ########################################################
  994. sub DistanceInTemplate {
  995. my $queryatomOne = $_[0]; #209
  996. my $queryatomTwo = $_[1]; #222
  997. my $queryresidueOne = $_[2]; #27
  998. my $queryresidueTwo = $_[3]; #29
  999. my $tempresidueOne = $_[4]; #4
  1000. my $tempresidueTwo = $_[5]; #6
  1001. my $queryatomnameOne = $_[6]; #CA
  1002. my $queryatomnameTwo = $_[7]; #CA
  1003. my $queryresiduenameOne = $_[8]; #ILE
  1004. my $queryresiduenameTwo = $_[9]; #LEU
  1005. my $templatenr = $_[10]; #0
  1006. my $case = $_[11];
  1007. #@PDBs; global contains: Atomname[0] Residuename[1] ResidueNr[2] x[3] y[4] z[5]
  1008. my $dtemplate;
  1009. #print"search Query:$queryatomnameOne,$queryresiduenameOne & $queryatomnameTwo,$queryresiduenameTwo Temp:$tempresidueOne $tempresidueTwo\n";
  1010. #print"OPEN: $templatename.pdb search:$tempresidueOne\n";
  1011. #find xyz in template pdb
  1012. my @coordinatesOne = ();
  1013. my @coordinatesTwo = ();
  1014. my $a = 0;
  1015. my $b = 0;
  1016. my $c = 0;
  1017. my $d = 0;
  1018. my $e = 0;
  1019. my $f = 0;
  1020. if (@{$atomsForResidue[$templatenr][$tempresidueOne]}) {
  1021. foreach my $atom (@{$atomsForResidue[$templatenr][$tempresidueOne]}) {
  1022. push(@coordinatesOne, [$PDBs[$templatenr][$atom][0],$PDBs[$templatenr][$atom][1],$PDBs[$templatenr][$atom][3],$PDBs[$templatenr][$atom][4],$PDBs[$templatenr][$atom][5]]);
  1023. }
  1024. }
  1025. if (@{$atomsForResidue[$templatenr][$tempresidueTwo]}) {
  1026. foreach my $atom (@{$atomsForResidue[$templatenr][$tempresidueTwo]}) {
  1027. push(@coordinatesTwo, [$PDBs[$templatenr][$atom][0],$PDBs[$templatenr][$atom][1],$PDBs[$templatenr][$atom][3],$PDBs[$templatenr][$atom][4],$PDBs[$templatenr][$atom][5]]);
  1028. }
  1029. }
  1030. ## find all atoms for one template belonging to the template residues given, e.g. 27 and 29
  1031. ## TODO: avoid traversing whole array by recording all atoms for each residue for each template
  1032. # for(my $i=0; $i<@{$PDBs[$templatenr]}; $i++) {
  1033. # if (($PDBs[$templatenr][$i][2]) && ($PDBs[$templatenr][$i][2] == $tempresidueOne)) {
  1034. # #print"$PDBs[$templatenr][$i][0] $PDBs[$templatenr][$i][1] $PDBs[$templatenr][$i][2] $PDBs[$templatenr][$i][3] $PDBs[$templatenr][$i][4] $PDBs[$templatenr][$i][5]\n";
  1035. # push(@coordinatesOne,[$PDBs[$templatenr][$i][0],$PDBs[$templatenr][$i][1],$PDBs[$templatenr][$i][3],$PDBs[$templatenr][$i][4],$PDBs[$templatenr][$i][5]]);
  1036. # }
  1037. # if (($PDBs[$templatenr][$i][2])&&($PDBs[$templatenr][$i][2] == $tempresidueTwo)) {
  1038. # #print"$PDBs[$templatenr][$i][0] $PDBs[$templatenr][$i][1] $PDBs[$templatenr][$i][2] $PDBs[$templatenr][$i][3] $PDBs[$templatenr][$i][4] $PDBs[$templatenr][$i][5]\n";
  1039. # push(@coordinatesTwo,[$PDBs[$templatenr][$i][0],$PDBs[$templatenr][$i][1],$PDBs[$templatenr][$i][3],$PDBs[$templatenr][$i][4],$PDBs[$templatenr][$i][5]]);
  1040. # }
  1041. # }
  1042. # foreach my $atom {$atomsForResidue[$templatenr]}{$tempresidueOne}
  1043. my $foundOne = 0;
  1044. my $foundTwo = 0;
  1045. for(my $k=0; $k<@coordinatesOne; $k++) {
  1046. #print "$coordinatesOne[$k][0] eq $queryatomnameOne?\n";
  1047. if ($coordinatesOne[$k][0] eq $queryatomnameOne) {
  1048. $a = $coordinatesOne[$k][2];
  1049. $b = $coordinatesOne[$k][3];
  1050. $c = $coordinatesOne[$k][4];
  1051. $foundOne = 1;
  1052. last;
  1053. }
  1054. }
  1055. for(my $k=0; $k<@coordinatesTwo; $k++) {
  1056. #print "$coordinatesTwo[$k][0] eq $queryatomnameTwo?\n";
  1057. if ($coordinatesTwo[$k][0] eq $queryatomnameTwo) {
  1058. $d = $coordinatesTwo[$k][2];
  1059. $e = $coordinatesTwo[$k][3];
  1060. $f = $coordinatesTwo[$k][4];
  1061. $foundTwo = 1;
  1062. last;
  1063. }
  1064. }
  1065. if($foundOne==0 || $foundTwo==0) {
  1066. if (@coordinatesOne) {
  1067. if (@coordinatesTwo) {
  1068. if($foundOne==0) {
  1069. my @alternatives = &searchAlternatives($queryresiduenameOne,$queryatomnameOne);
  1070. #print "QUERY1:$queryatomnameOne in $queryresiduenameOne TEMPLATE:\t";
  1071. #print"\nsearchAlternatives 1 zu $queryresiduenameOne $queryatomnameOne ($queryatomOne $queryatomTwo) found:\n";
  1072. for(my $k=0; $k<@coordinatesOne; $k++) {
  1073. #print "$coordinatesOne[$k][0] in $coordinatesOne[$k][1]\nALTERNATIVE?\n";
  1074. for (my $w=1; $w<@alternatives; $w++) {
  1075. #print "$alternatives[$w]\n";
  1076. if ($alternatives[$w] =~ /\s$coordinatesOne[$k][1]:$coordinatesOne[$k][0]\s/) {
  1077. #print "HIT!!!: $coordinatesOne[$k][2] $coordinatesOne[$k][3] $coordinatesOne[$k][4]";
  1078. $a += $coordinatesOne[$k][2];
  1079. $b += $coordinatesOne[$k][3];
  1080. $c += $coordinatesOne[$k][4];
  1081. $foundOne++;
  1082. }
  1083. }
  1084. }
  1085. }
  1086. if($foundTwo==0) {
  1087. my @alternatives = &searchAlternatives($queryresiduenameTwo,$queryatomnameTwo);
  1088. #print "\nQUERY2:$queryatomnameTwo in $queryresiduenameTwo TEMPLATE:\t";
  1089. #print"\nsearchAlternatives 2 zu $queryresiduenameTwo $queryatomnameTwo ($queryatomOne $queryatomTwo) found:\n";
  1090. for(my $k=0; $k<@coordinatesTwo; $k++) {
  1091. #print "$coordinatesTwo[$k][0] in $coordinatesTwo[$k][1]\nALTERNATIVE?\n";
  1092. for (my $w=1; $w<@alternatives; $w++) {
  1093. #print "$alternatives[$w]\n";
  1094. if ($alternatives[$w] =~ /\s$coordinatesTwo[$k][1]:$coordinatesTwo[$k][0]\s/){
  1095. #print "HIT!!!: $coordinatesTwo[$k][2] $coordinatesTwo[$k][3] $coordinatesTwo[$k][4]";
  1096. $d += $coordinatesTwo[$k][2];
  1097. $e += $coordinatesTwo[$k][3];
  1098. $f += $coordinatesTwo[$k][4];
  1099. $foundTwo++;
  1100. }
  1101. }
  1102. }
  1103. }
  1104. if ($foundOne>0 && $foundTwo>0) {
  1105. $a /= $foundOne;
  1106. $b /= $foundOne;
  1107. $c /= $foundOne;
  1108. $d /= $foundTwo;
  1109. $e /= $foundTwo;
  1110. $f /= $foundTwo;
  1111. $dtemplate= sqrt(($a-$d)*($a-$d) + ($b-$e)*($b-$e) + ($c-$f)*($c-$f));
  1112. print"FOUND ALTERNATIVE distance: $dtemplate\n";
  1113. }
  1114. else{$dtemplate = "NULL";}
  1115. }
  1116. else{$dtemplate = "NULL";}
  1117. }
  1118. else{$dtemplate = "NULL";}
  1119. }
  1120. else {
  1121. $dtemplate= sqrt(($a-$d)*($a-$d) + ($b-$e)*($b-$e) + ($c-$f)*($c-$f));
  1122. if ($dtemplate < 1) {
  1123. print "==> Distance < 1 : $dtemplate\n";
  1124. print "queryatomOne = $queryatomOne\n";
  1125. print "queryatomTwo = $queryatomTwo\n";
  1126. print "queryresidueOne = $queryresidueOne\n";
  1127. print "queryresidueTwo = $queryresidueTwo\n";
  1128. print "tempresidueOne = $tempresidueOne\n";
  1129. print "tempresidueTwo = $tempresidueTwo\n";
  1130. print "queryatomnameOne = $queryatomnameOne\n";
  1131. print "queryatomnameTwo = $queryatomnameTwo\n";
  1132. print "queryresidue = $queryresiduenameOne\n";
  1133. print "queryresiduenameTwo = $queryresiduenameTwo\n";
  1134. print "templatenr = $templatenr\n";
  1135. print "case = $case\n";
  1136. }
  1137. }
  1138. #print "DELTA: $dtemplate\n";
  1139. return $dtemplate;
  1140. }
  1141. ##################################################################
  1142. ## input: self explanatory, see arguments
  1143. ## output: constraint line as needed for modeller constraint file
  1144. ##
  1145. ## description:
  1146. ## the parameters for the new constraint are calculated by a MDN
  1147. ## parameters with index 2 are background parameters
  1148. ## the new restraint is generated and returned
  1149. ##################################################################
  1150. sub CalcRSR{
  1151. my $posteriori = shift;
  1152. my $sim = shift;
  1153. my $dtemplate = shift;
  1154. my $rsrType = shift;
  1155. my $atom1 = shift;
  1156. my $atom2 = shift;
  1157. my $mdnCACA = shift;
  1158. my $mdnNO = shift;
  1159. my $mdnSCMC = shift;
  1160. my $mdnSCSC = shift;
  1161. my @mdnOutput;
  1162. my @mdnInput = ($dtemplate, $posteriori, $sim);
  1163. if ($rsrType == 9) {
  1164. @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
  1165. }
  1166. elsif ($rsrType == 10) {
  1167. @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
  1168. }
  1169. elsif ($rsrType == 23) {
  1170. @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
  1171. }
  1172. elsif ($rsrType == 26) {
  1173. @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
  1174. }
  1175. my $p1 = $mdnOutput[0];
  1176. my $p2 = $mdnOutput[1];
  1177. my $mu1 = log($dtemplate) + $mdnOutput[2];
  1178. my $mu2 = log($dtemplate) + $mdnOutput[3];
  1179. my $sigma1 = $mdnOutput[4];
  1180. my $sigma2 = $mdnOutput[5];
  1181. # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
  1182. #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
  1183. #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
  1184. my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f",$rsrType,$atom1,$atom2,$p1,$mu1,$sigma1,$p2,$mu2,$sigma2);
  1185. return $newline;
  1186. }
  1187. ##################################################################
  1188. ## input: self explanatory, see arguments
  1189. ## output: constraint line as needed for modeller constraint file
  1190. ##
  1191. ## description:
  1192. ## the parameters for the new constraint, are calculated by a MDN
  1193. ## parameters with index 2 are background parameters
  1194. ##################################################################
  1195. sub CalcRSRTweight{
  1196. my $posteriori = shift;
  1197. my $sim = shift;
  1198. my $dtemplate = shift;
  1199. my $rsrType = shift;
  1200. my $atom1 = shift;
  1201. my $atom2 = shift;
  1202. my $mdnCACA = shift;
  1203. my $mdnNO = shift;
  1204. my $mdnSCMC = shift;
  1205. my $mdnSCSC = shift;
  1206. my $tweight = shift;
  1207. my @mdnOutput;
  1208. my @mdnInput = ($dtemplate, $posteriori, $sim);
  1209. if ($rsrType == 9) {
  1210. @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
  1211. }
  1212. elsif ($rsrType == 10) {
  1213. @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
  1214. }
  1215. elsif ($rsrType == 23) {
  1216. @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
  1217. }
  1218. elsif ($rsrType == 26) {
  1219. @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
  1220. }
  1221. my $p1 = $mdnOutput[0];
  1222. my $p2 = $mdnOutput[1];
  1223. my $mu1 = log($dtemplate) + $mdnOutput[2];
  1224. my $mu2 = log($dtemplate) + $mdnOutput[3];
  1225. my $sigma1 = $mdnOutput[4];
  1226. my $sigma2 = $mdnOutput[5];
  1227. # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
  1228. #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
  1229. #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
  1230. my $newline = sprintf("R 50 1 1 %3.0f 2 7 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f",$rsrType,$atom1,$atom2,$p1,$mu1,$sigma1,$p2,$mu2,$sigma2, $tweight);
  1231. return $newline;
  1232. }
  1233. ##################################################################
  1234. ## input: self explanatory, see arguments
  1235. ## output: constraint line as needed for modeller constraint file
  1236. ##
  1237. ## description:
  1238. ## the parameters for the new constraint, are calculated by a MDN
  1239. ## instead of writing the parameters mu, mu_BG, sigma, sigma_BG,
  1240. ## w, w_BG into the restraints file, use precalculated parameters
  1241. ## a,b,c in order to save computing steps when Modeller optimization
  1242. ## runs
  1243. ## parameters with index 2 are background parameters
  1244. ##################################################################
  1245. sub CalcRSRefficient {
  1246. my $posteriori = shift;
  1247. my $sim = shift;
  1248. my $dtemplate = shift;
  1249. my $rsrType = shift;
  1250. my $atom1 = shift;
  1251. my $atom2 = shift;
  1252. my $mdnCACA = shift;
  1253. my $mdnNO = shift;
  1254. my $mdnSCMC = shift;
  1255. my $mdnSCSC = shift;
  1256. my @mdnOutput;
  1257. my @mdnInput = ($dtemplate, $posteriori, $sim);
  1258. if ($rsrType == 9) {
  1259. @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
  1260. }
  1261. elsif ($rsrType == 10) {
  1262. @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
  1263. }
  1264. elsif ($rsrType == 23) {
  1265. @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
  1266. }
  1267. elsif ($rsrType == 26) {
  1268. @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
  1269. }
  1270. my $p1 = $mdnOutput[0];
  1271. my $p2 = $mdnOutput[1];
  1272. my $mu1 = log($dtemplate) + $mdnOutput[2];
  1273. my $mu2 = log($dtemplate) + $mdnOutput[3];
  1274. my $sigma1 = $mdnOutput[4];
  1275. my $sigma2 = $mdnOutput[5];
  1276. # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
  1277. #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
  1278. #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
  1279. my $sigma1sq = $sigma1*$sigma1;
  1280. my $sigma2sq = $sigma2*$sigma2;
  1281. my $factorOne = 2*$sigma1sq*$sigma2sq;
  1282. my $a = $p1/$p2 * $sigma2/$sigma1;
  1283. my $b = ($sigma2sq - $sigma1sq)/$factorOne;
  1284. my $factorTwo = ($sigma2sq*$mu1 - $sigma1sq*$mu2);
  1285. my $c = $factorTwo/($sigma2sq - $sigma1sq);
  1286. my $d = $factorTwo*$factorTwo/($factorOne*($sigma2sq - $sigma1sq)) - $factorTwo/$factorOne;
  1287. my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f\n",$rsrType,$atom1,$atom2,$a, $b, $c, $d);
  1288. return $newline;
  1289. }
  1290. sub CalcRSRPrev{
  1291. my $probi = $_[0];
  1292. my $probj = $_[1];
  1293. my $sim = $_[2];
  1294. my $dtemplate = $_[3];
  1295. my $rsrType = $_[4];
  1296. my $atom1 = $_[5];
  1297. my $atom2 = $_[6];
  1298. #distance Template < cutoff
  1299. # avPI avSIM avDELTA
  1300. # CACAlog 0.78950 0.47877 2.29587
  1301. # NOlog 0.79391 0.47610 2.04427
  1302. # SCMClog 0.81810 0.50638 1.50130
  1303. # SCSClog 0.87671 0.59417 1.46639
  1304. my @Avec; #Parametervalues: a0 a1 a2 a3 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 c1 c2 c3 c4 c5 d0 d1 d2 d3 d4 d5
  1305. my $PI;
  1306. my $Sim;
  1307. my $deltatemplate;
  1308. if ($rsrType==9){
  1309. @Avec = (-2.6961,-0.7186,0.1326 ,-0.6153,0.6712 ,0.7634 ,0.1174 ,0.0353 ,0.3422 ,-0.2807,0.2447 ,0.0463 ,-0.0171,0.1796 ,-1.0388,-0.1192,-0.3310,0.0900 ,0.2468 ,-0.2058,0.1296 ,-0.1788,-0.1989,0.0080 ,0.3942 ,0.1421);
  1310. $PI= $probi*$probj - 0.7895;
  1311. $Sim = $sim - 0.4788;
  1312. $deltatemplate = log($dtemplate)- 2.29587;
  1313. }
  1314. elsif ($rsrType==10){
  1315. @Avec = (-2.6514,-0.5396,-0.0368,-0.5819,0.6323 ,0.8551 ,0.0992 ,-0.0011,0.4606 ,0.0202 ,0.2238 ,0.1218 ,-0.0396,0.0448 ,-0.9667,-0.2518,-0.4197,0.0653 ,0.2507 ,-0.2392,0.1500 ,-0.0833,-0.2595,-0.0224,0.3706 ,0.1158);
  1316. $PI= $probi*$probj - 0.7940;
  1317. $Sim = $sim - 0.4763;
  1318. $deltatemplate = log($dtemplate)- 2.04427;
  1319. }
  1320. elsif($rsrType==23){
  1321. @Avec = (-3.4835,-0.1144,-0.6024,-0.4093,0.5841 ,0.4945 ,0.0102 ,-0.4040,0.3905 ,0.1852 ,0.0844 ,0.0213 ,0.0668 ,-0.3501,-1.6170,-0.8439,-0.2171,0.0006 ,-0.1970,-0.9967,0.0843 ,-0.2868,-0.0493,-0.0108,0.0096 ,0.3162);
  1322. $PI= $probi*$probj - 0.8893;
  1323. $Sim = $sim - 0.6281;
  1324. $deltatemplate = log($dtemplate)- 1.50130;
  1325. }
  1326. elsif($rsrType==26){
  1327. @Avec = (-2.3377,-0.3537,1.9132 ,-0.1555,0.5179 ,0.6930 ,0.1961 ,0.9687 ,0.3331 ,-0.0203,0.6672 ,-0.1298,-0.0680,1.0575 ,-0.9797,0.2624 ,-0.5030,0.1753 ,1.0932 ,1.6404 ,0.3804 ,-0.2516,-0.5884,-0.0289,0.5928 ,0.3932);
  1328. $PI= $probi*$probj - 0.9373;
  1329. $Sim = $sim - 0.7220;
  1330. $deltatemplate = log($dtemplate)- 1.46639;
  1331. }
  1332. my $A = $Avec[0] + $Avec[1] *$Sim + $Avec[2] *$deltatemplate;
  1333. my $B = $Avec[3];
  1334. my $logsigma = $A + $B*$PI;
  1335. my $p = $Avec[4]+$Avec[5]*$PI+$Avec[6]*$Sim+$Avec[7]*$deltatemplate+$Avec[8]*$PI*$PI+$Avec[9]*$PI*$Sim+$Avec[10]*$PI*$deltatemplate+$Avec[11]*$Sim*$deltatemplate+$Avec[12]*$Sim*$Sim+$Avec[13]*$deltatemplate*$deltatemplate;
  1336. if ($p>1) {$p=1;}elsif ($p<0) {$p=0;}
  1337. my $logsigmaBG = $Avec[14] + $Avec[15]*$deltatemplate + $Avec[16]*$PI + $Avec[17]*$Sim + $Avec[18]*$PI*$deltatemplate + $Avec[19]*$deltatemplate*$deltatemplate;
  1338. my $logmuBG = $Avec[20] + $Avec[21]*$deltatemplate + $Avec[22]*$PI + $Avec[23]*$Sim + $Avec[24]*$PI*$deltatemplate + $Avec[25]*$deltatemplate*$deltatemplate;
  1339. #for Modeller Restraints:
  1340. my $p1 = sprintf("%.5f" ,$p);
  1341. my $p2 = sprintf("%.5f" ,(1-$p1));
  1342. my $sigma = exp($logsigma);
  1343. $sigma = sprintf("%.5f" ,$sigma);
  1344. my $sigmaBG = exp($logsigmaBG);
  1345. $sigmaBG = sprintf("%.5f" ,$sigmaBG);
  1346. my $mu = log($dtemplate);
  1347. $mu = sprintf("%.5f" ,$mu);
  1348. my $muBG = $logmuBG + $mu;
  1349. $muBG = sprintf("%.5f" ,$muBG);
  1350. # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
  1351. #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
  1352. #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
  1353. my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n",$rsrType,$atom1,$atom2,$p1,$mu,$sigma,$p2,$muBG,$sigmaBG);
  1354. return $newline;
  1355. }
  1356. sub searchAlternatives{
  1357. #alternatives from /cluster/bioprogs/modeller/modlib/atmcls-melo.lib
  1358. my $Residue = $_[0];
  1359. my $Atom = $_[1];
  1360. #print "\s$Residue:$Atom\s\n";
  1361. my @ATMGRP01 = (' ALA:CB ', ' ILE:CG2 ', ' ILE:CD1 ', ' ILE:CD ', ' LEU:CD1 ', ' LEU:CD2 ', ' THR:CG2 ', ' VAL:CG1 ', ' VAL:CG2 ');
  1362. my @ATMGRP02 = (' ILE:CB ', ' LEU:CG ', ' VAL:CB ');
  1363. my @ATMGRP03 = (' ARG:CB ', ' ARG:CG ', ' ASN:CB ', ' ASP:CB ', ' GLU:CB ', ' GLU:CG ', ' ILE:CG1 ', ' GLN:CB ', ' GLN:CG ', ' HIS:CB ', ' HSD:CB ', ' LEU:CB ', ' LYS:CB ', ' LYS:CG ', ' LYS:CD ', ' MET:CB ', ' PHE:CB ', ' PRO:CB ', ' PRO:CG ', ' TRP:CB ', ' TYR:CB ');
  1364. my @ATMGRP04 = (' PHE:CG ', ' TRP:CD2 ', ' TYR:CG ');
  1365. my @ATMGRP05 = (' PHE:CD1 ', ' PHE:CD2 ', ' PHE:CE1 ', ' PHE:CE2 ', ' PHE:CZ ', ' TRP:CE3 ', ' TRP:CZ2 ', ' TRP:CZ3 ', ' TRP:CH2 ', ' TYR:CD1 ', ' TYR:CD2 ', ' TYR:CE1 ', ' TYR:CE2 ');
  1366. my @ATMGRP06 = (' SER:OG ', ' THR:OG1 ');
  1367. my @ATMGRP07 = (' ASN:ND2 ', ' GLN:NE2 ');
  1368. my @ATMGRP08 = (' CYS:SG ', ' CSS:SG ');
  1369. my @ATMGRP09 = (' ARG:NH1 ', ' ARG:NH2 ');
  1370. my @ATMGRP10 = (' HIS:CG ', ' HSD:CG ');
  1371. my @ATMGRP11 = (' HIS:CD2 ', ' HSD:CD2 ', ' TRP:CD1 ');
  1372. my @ATMGRP12 = (' HIS:NE2 ', ' HSD:NE2 ');
  1373. my @ATMGRP13 = (' HIS:CE1 ', ' HSD:CE1 ');
  1374. my @ATMGRP14 = (' ASP:CG ', ' GLU:CD ');
  1375. my @ATMGRP15 = (' ALA:OXT ', ' ARG:OXT ', ' ASN:OXT ', ' ASP:OXT ', ' CYS:OXT ', ' CSS:OXT ', ' GLU:OXT ', ' GLN:OXT ', ' GLY:OXT ', ' HIS:OXT ', ' HSD:OXT ', ' ILE:OXT ', ' LEU:OXT ', ' LYS:OXT ', ' MET:OXT ', ' PHE:OXT ', ' PRO:OXT ', ' SER:OXT ', ' THR:OXT ', ' TRP:OXT ', ' TYR:OXT ', ' VAL:OXT ', ' ASP:OD1 ', ' ASP:OD2 ', ' ASP:OT1 ', ' ASP:OT2 ', ' GLU:OE1 ', ' GLU:OE2 ', ' GLU:OT1 ', ' GLU:OT2 ');
  1376. my @ATMGRP16 = (' CYS:CB ', ' CSS:CB ', ' MET:CG ');
  1377. my @ATMGRP17 = (' ASN:CG ', ' GLN:CD ');
  1378. my @ATMGRP18 = (' ASN:OD1 ', ' GLN:OE1 ');
  1379. my @ATMGRP19 = (' HIS:ND1 ', ' HSD:ND1 ');
  1380. my @alternatives;
  1381. if (grep(/\s$Residue:$Atom\s/,@ATMGRP01)){@alternatives=@ATMGRP01;}
  1382. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP02)){@alternatives=@ATMGRP02;}
  1383. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP03)){@alternatives=@ATMGRP03;}
  1384. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP04)){@alternatives=@ATMGRP04;}
  1385. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP05)){@alternatives=@ATMGRP05;}
  1386. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP06)){@alternatives=@ATMGRP06;}
  1387. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP07)){@alternatives=@ATMGRP07;}
  1388. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP08)){@alternatives=@ATMGRP08;}
  1389. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP09)){@alternatives=@ATMGRP09;}
  1390. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP10)){@alternatives=@ATMGRP10;}
  1391. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP11)){@alternatives=@ATMGRP11;}
  1392. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP12)){@alternatives=@ATMGRP12;}
  1393. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP13)){@alternatives=@ATMGRP13;}
  1394. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP14)){@alternatives=@ATMGRP14;}
  1395. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP15)){@alternatives=@ATMGRP15;}
  1396. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP16)){@alternatives=@ATMGRP16;}
  1397. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP17)){@alternatives=@ATMGRP17;}
  1398. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP18)){@alternatives=@ATMGRP18;}
  1399. elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP19)){@alternatives=@ATMGRP19;}
  1400. return @alternatives;
  1401. }
  1402. sub CalcWnew {
  1403. my $d = shift;
  1404. my $p1 = shift;
  1405. my $mu1 = shift;
  1406. my $sigma1 = shift;
  1407. my $p2 = shift;
  1408. my $mu2 = shift;
  1409. my $sigma2 = shift;
  1410. my $correct = $p1 / $sigma1 * exp( -0.5 * (($d - $mu1)/$sigma1)*(($d - $mu1)/$sigma1) );
  1411. my $bg = $p2 / $sigma2 * exp(-0.5 * (($d - $mu2)/$sigma2)*(($d - $mu2)/$sigma2) );
  1412. return $correct/($correct + $bg);
  1413. }
  1414. ########################################################################
  1415. ## input: rsrfile: name of restraints file (full path)
  1416. ## models: pdb models generated in round before (full path)
  1417. ##
  1418. ## output: generates a new restraints file called:
  1419. ## OLDRESTRAINTSFILE.iter
  1420. ## this file is then to be used for another round of Modeller
  1421. ## model generation
  1422. ##
  1423. ## description: assume that a previous Modeller run has generated
  1424. ## e.g. 4 models. Now, one wants to use the information contained in
  1425. ## the generated models.
  1426. ## For all models, there is a single rsr file. The generated pdb-files
  1427. ## (models) are "realizations" of these contraints, e.g. take a distance
  1428. ## restraint for CACA between residues 4 and 39 in a target.
  1429. ## Each model has now a concrete value for the distance between 4 and 39
  1430. ## Interpret now w as:
  1431. ## w = P(correct_iji'j' | d_ij, d'_i'j', A_ij) = wN(log(d_ij)|..) / (wN(log(d_ij)|..) + (1-w)N(log(d'_ij)|..)
  1432. ## and calculate w for each model.
  1433. ## Take as new w the median of all these w.
  1434. ########################################################################
  1435. sub UpdateRsrFromModels {
  1436. my $rsrfile = shift;
  1437. my @models = @_;
  1438. my @pdbs;
  1439. ## copy pdb files and rename the copied files, because next modeller round will
  1440. ## generate new files named as the old ones
  1441. for (my $i=0; $i<@models; $i++) {
  1442. system("cp $models[$i] $models[$i].prev");
  1443. $models[$i] .= ".prev";
  1444. }
  1445. ## read in models
  1446. ## atomnr -> x,y,z
  1447. for (my $i=0; $i<@models; $i++) {
  1448. my %structureInfo;
  1449. if (not defined( open (MH, "< $models[$i]"))) {
  1450. print "Cant open $models[$i]. Skipping $models[$i]...\n";
  1451. next;
  1452. }
  1453. while(<MH>) {
  1454. if (/ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)/) {
  1455. ## $atomnr x y z
  1456. $structureInfo{$1} = [$5, $6, $7];
  1457. }
  1458. }
  1459. close(MH);
  1460. ## save atomnr->[x,y,z] hash for each model
  1461. $pdbs[$i] = \%structureInfo;
  1462. }
  1463. open (RH, "< $rsrfile") or die "Cant open $rsrfile: $!\n";
  1464. my @newRestraints;
  1465. ## go through restraints and for each (CACA,NO,SCMC,SCSC distance) restraint calculate the acutal distance
  1466. ## in each model and calculate new w for the restraint r. Replace r by new restraint (new w and 1-w)
  1467. while (my $rsr = <RH>) {
  1468. ## 50 1 1 9 2 6 1 289 305 0.2070 1.8664 0.1161 0.7930 2.0669 0.4316
  1469. ## atom1 atom2 p1 mu1 sigma1 p2 mu2 sigma2
  1470. if ($rsr =~ /^R\s+50\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
  1471. my $rsrtype = $1;
  1472. my $atom1 = $2;
  1473. my $atom2 = $3;
  1474. my $p1 = $4;
  1475. my $mu1 = $5;
  1476. my $sigma1 = $6;
  1477. my $p2 = $7;
  1478. my $mu2 = $8;
  1479. my $sigma2 = $9;
  1480. my @allWs;
  1481. my $finalNewW;
  1482. for (my $i=0; $i<@models; $i++) {
  1483. ## coordinates of atom1 and atom2
  1484. my @coAtom1 = @{$pdbs[$i]{$atom1}};
  1485. my @coAtom2 = @{$pdbs[$i]{$atom2}};
  1486. my $distance = sqrt( ($coAtom1[0]-$coAtom2[0])*($coAtom1[0]-$coAtom2[0]) +($coAtom1[1]-$coAtom2[1])*($coAtom1[1]-$coAtom2[1]) +($coAtom1[2]-$coAtom2[2])*($coAtom1[2]-$coAtom2[2]) );
  1487. my $wUpdated = &CalcWnew(log($distance), $p1, $mu1, $sigma1, $p2, $mu2, $sigma2);
  1488. push(@allWs, $wUpdated);
  1489. }
  1490. ## calculate the median of all new w:
  1491. @allWs = sort {$a <=> $b} @allWs;
  1492. if (@allWs % 2 == 0 and @allWs >= 2) {
  1493. $finalNewW = ($allWs[@allWs/2-1] + $allWs[@allWs/2]) / 2;
  1494. }
  1495. else {
  1496. $finalNewW = $allWs[@allWs/2];
  1497. }
  1498. ## update restraint and add it to the new restraints list
  1499. my $newrsr = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n", $rsrtype, $atom1, $atom2, $finalNewW, $mu1, $sigma1, (1-$finalNewW), $mu2, $sigma2);
  1500. push(@newRestraints, $newrsr);
  1501. }
  1502. else {
  1503. ## keep other restraints
  1504. push (@newRestraints, $rsr);
  1505. }
  1506. }
  1507. close(RH);
  1508. ## save new restraints in a new file
  1509. my $newRsrFileName = $rsrfile . ".iter";
  1510. open (IH, "> $newRsrFileName") or die ("Cant write $newRsrFileName: $!\n");
  1511. print (IH @newRestraints);
  1512. close(IH);
  1513. }
  1514. sub GenerateIterativeModels {
  1515. my $myrsr = shift;
  1516. my $queryName = shift;
  1517. my $tmpdir = shift;
  1518. my $inbase = shift;
  1519. my $outbase = shift;
  1520. &System("cp $inbase.B9999000*.pdb $tmpdir");
  1521. &System("mv $tmpdir/$queryName.B99990003.pdb $tmpdir/$queryName.B99990004.pdb");
  1522. &System("mv $tmpdir/$queryName.B99990002.pdb $tmpdir/$queryName.B99990003.pdb");
  1523. &System("mv $tmpdir/$queryName.B99990001.pdb $tmpdir/$queryName.B99990002.pdb");
  1524. &ModellerNew($myrsr, $queryName, $inbase, $outbase, $tmpdir, 1);
  1525. my @models = <$tmpdir/$queryName.B9999000*.pdb>;
  1526. &UpdateRsrFromModels($myrsr, @models);
  1527. &ModellerNew("$myrsr.iter", $queryName, $inbase, $outbase, $tmpdir, 3);
  1528. }
  1529. ##########
  1530. ## write out "final" pdb file in CASP compatible format
  1531. ##
  1532. ## outFile: filename of pdb file to write
  1533. ## srcFile: filename of already existing (Modeller generated) pdb file
  1534. ##
  1535. sub WriteCASPpdbFile {
  1536. my $outFile = shift @_;
  1537. my $srcFile = shift @_;
  1538. my $seqname = shift @_;
  1539. my $server = shift @_;
  1540. my $tList = shift @_;
  1541. my $templates = "";
  1542. my $predTMs = "";
  1543. for (my $i=0; $i<$tList->size(); $i++) {
  1544. $templates .= $tList->get($i)->get_Hit() . " ";
  1545. $predTMs .= sprintf("%.2f", $tList->get($i)->get_predTM()) . " ";
  1546. }
  1547. open(SRC, "< $srcFile") or die ("WriteCASPpdbFile: Cant open $srcFile: $!\n");
  1548. my @mod = <SRC>;
  1549. close(SRC);
  1550. my @coordinates;
  1551. ## skip all lines until first ATOM record is read
  1552. for (my $i=0; $i<@mod; $i++) {
  1553. if ($mod[$i] =~ /^ATOM/) {
  1554. push (@coordinates, $mod[$i]);
  1555. }
  1556. }
  1557. open(PDB, ">$outFile") or die ("WriteCASPpdbFile: Cant open $outFile: $!\n");
  1558. print(PDB "PFRMAT TS\n");
  1559. print(PDB "TARGET $seqname\n");
  1560. print(PDB "AUTHOR $server\n");
  1561. print(PDB "METHOD $server (http://hhpred.tuebingen.mpg.de)\n");
  1562. print(PDB "MODEL 1\n");
  1563. print(PDB "PARENT $templates\n");
  1564. print(PDB "REMARK PredTM $predTMs\n");
  1565. print(PDB @coordinates);
  1566. print(PDB "TER\n");
  1567. print(PDB "END\n");
  1568. close(PDB);
  1569. FixOccupancy::FixOccupancy($outFile);
  1570. }
  1571. 1;