123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893 |
- ##########################################################
- ## this package contains functions handling modeller, i.e.
- ## creating restraint file
- ## changing restraint file
- ## calling modeller program
- ##########################################################
- package modeller;
- require(Exporter);
- use strict;
- use config;
- use utilities;
- use TemplateList;
- use multiTemplateMDN;
- use distanceTree;
- use FixOccupancy;
- our @ISA = qw(Exporter);
- our @EXPORT = qw(Modeller ModellerRSR ModellerRaw ModellerNewDistance ChangeDistanceRestraints WriteCASPpdbFile reweighModellerDistRestraints);
- ######################################################
- ## input: name of query
- ## directory of query
- ## inbase: basename of files read in (pir)
- ## outbase: basename for outputs
- ##
- ## output: indirect: py file is written and modeller
- ## is called generating some more files
- ##
- ## description: calls Modeller and creates restraints
- ## file - needed when restraints file
- ## is to be changed
- ######################################################
- sub ModellerRSR{
- my %args = @_;
-
- my $queryName = $args{queryName};
- my $workingDir = $args{workingDir};
- my $outbase = $args{outbase};
- my $config = $args{config};
- my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
- my $tmpdir = $workingDir;
- my $modeller = $config->get_modeller();
- #Modeller constructs only restraints file
- my $offset;
- #Change sequence name to id_temp and get templates -> $templates
- my $templates = "";
- my $knowns = "";
- my %knowns;
- open (IN, "<$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- my @lines = <IN>;
- close(IN);
- $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
- for (my $i=0; $i<@lines; $i++) {
- if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
- $offset = $1-1;
- $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
- }
- elsif ($lines[$i] =~ /^structureX:/) {
- $lines[$i-1] =~ />P1;(\S+)/;
- $knowns .= " '$1'," if (not exists($knowns{$1}));
- $templates .= " $1";
- $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
- }
- }
- $templates =~ s/^ //;
- open (OUT, "> $outbase.pir") or die ("Error: cannt write $outbase.pir: $!\n");
- print(OUT @lines);
- close(OUT);
-
- # Write the py-file
- open(PY,">$outbase.py") or die("Cannot open: $outbase.py");
-
- print PY ("# Homology modeling by the automodel class\n");
- print PY ("from modeller import * # Load standard Modeller classes\n");
- print PY ("from modeller.automodel import * # Load the automodel class\n");
- print PY ("log.verbose()\n");
- print PY ("env = environ()\n");
- print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
- print PY ("a = automodel(env,\n");
- print PY (" alnfile = '$outbase.pir', # alignment filename\n");
- print PY (" knowns = ($knowns), # codes of the templates\n");
- print PY (" sequence = '$queryName') # code of the target\n");
- print PY ("a.homcsr(0)\n");
- close (PY);
-
- #prepare for MODELLER:
- system("chmod 777 $outbase.pir");
- system("chmod 777 $outbase.py");
-
- #run MODELLER: create *.ini & *.rsr
- &System("cd $tmpdir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
-
- #TREAT FAILED MODELLER!
- return;
- }
- sub ModellerRaw {
- my %args = @_;
-
- my $myrsr = $args{rsrFile};
- my $queryName = $args{queryName};
- my $workingDir = $args{workingDir};
- my $outbase = $args{outbase};
- my $config = $args{config};
- # print "myrsr=$myrsr\n";
- # print "queryName=$queryName\n";
- # print "workingDir=$workingDir\n";
- # print "outbase=$outbase\n";
- my $NMODELS = $config->get_numberOfGeneratedModels();
- my $modeller = $config->get_modeller();
- my $pdbdir = $config->get_pdbdir();
- my $offset;
- my $templates = "";
- my $knowns = "";
- my %knowns;
- open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- my @lines = <IN>;
- close(IN);
- $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
- for (my $i=0; $i<@lines; $i++) {
- if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
- $offset = $1-1;
- $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
- }
- elsif ($lines[$i] =~ /^structureX:/) {
- $lines[$i-1] =~ />P1;(\S+)/;
- $knowns .= " '$1'," if (not exists($knowns{$1}));
- $templates .= " $1";
- $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
- }
- }
- $templates =~ s/^ //;
- open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- print(OUT @lines);
- close(OUT);
-
- # Write the py-file
- open(PY, "> $outbase.py") or die("Cannot write $outbase.py");
-
- print PY ("# Homology modeling by the automodel class\n");
- print PY ("from modeller import * # Load standard Modeller classes\n");
- print PY ("from modeller.automodel import * # Load the automodel class\n");
- print PY ("log.verbose()\n");
- print PY ("env = environ()\n");
- print PY ("# directories for input atom files\n");
- print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
- print PY ("a = automodel(env,\n");
- print PY (" alnfile = '$outbase.pir', # alignment filename\n");
- print PY (" knowns =($knowns), # codes of the templates\n");
- print PY (" sequence ='$queryName', # code of the target\n");
- print PY (" csrfile = '$myrsr') \n");
- print PY ("a.starting_model= 1 # index of the first model\n");
- print PY ("a.ending_model = $NMODELS # index of the last model\n");
- print PY ("a.make() # do the actual homology modeling\n");
- close (PY);
-
- #prepare for MODELLER:
- system("chmod 777 $outbase.pir");
- system("chmod 777 $outbase.py");
-
- #run MODELLER: create *.ini & *.rsr
- &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
-
- #TREAT FAILED MODELLER!
- #check model(s):
- my @modelsBuilt;
- for (my $i=1; $i<=$NMODELS; $i++) {
- my $modelID = "B" . (99990000 + $i);
- if (! -e "$outbase.$modelID.pdb") {
- print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
-
- #EXCEPTION does not work!!!!debug hhmakemodel.pl
- #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
- }
- else {
- push (@modelsBuilt, $i);
- }
- }
- # find model with minimal energy
- if (scalar(@modelsBuilt) > 0) {
- my $minEnergy = +1E8;
- my $imin = -1;
- my $line;
- my $bestModelID;
- ## search for best model (lowest objective function value)
- ## and save this model - remove all others
- foreach my $model (@modelsBuilt) {
- my $modelID = "B" . (99990000 + $model);
- open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
- my $energy;
- while ($line = <IN>) {
- if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
- $energy = $1;
- last;
- }
- }
- if ($energy < $minEnergy) {
- $minEnergy = $energy;
- $imin = $model;
- }
- close(IN);
- }
- if ($imin != -1) {
- my $minModelID = "B" . (99990000 + $imin);
- &System("cp $outbase.$minModelID.pdb $outbase.pdb");
- } else {
- print "WARNING: MODELLER no minimal energy structure found!\n";
- }
- }
-
- &System("rm $outbase.sch $outbase.V999* $outbase.D000*");
- return;
- }
- sub Modeller {
- my %args = @_;
-
- my $queryName = $args{queryName};
- my $workingDir = $args{workingDir};
- my $outbase = $args{outbase};
- my $config = $args{config};
- my $physicalScale = 1;
- $physicalScale = $args{physicalScale} if (defined($args{physicalScale}));
- my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
-
- my $NMODELS = $config->get_numberOfGeneratedModels();
- my $modeller = $config->get_modeller();
- my $offset;
- my $templates = "";
- my $knowns = "";
- my %knowns;
- open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- my @lines = <IN>;
- close(IN);
- $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
- for (my $i=0; $i<@lines; $i++) {
- if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
- $offset = $1-1;
- $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
- }
- elsif ($lines[$i] =~ /^structureX:/) {
- $lines[$i-1] =~ />P1;(\S+)/;
- $knowns .= " '$1'," if (not exists($knowns{$1}));
- $templates .= " $1";
- $knowns{$1} = 1; # avoid double entry in knowns, Modeller cant handle
- }
- }
- $templates =~ s/^ //;
- open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- print(OUT @lines);
- close(OUT);
-
- # Write the py-file
- open(PY, "> $outbase.py") or die("Cannot write $outbase.py");
-
- print PY ("# Homology modeling by the automodel class\n");
- print PY ("from modeller import * # Load standard Modeller classes\n");
- print PY ("from modeller.automodel import * # Load the automodel class\n");
- print PY ("log.verbose()\n");
- print PY ("env = environ()\n");
- if ($physicalScale != 1) {
- print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$physicalScale, n_o_distance=$physicalScale, sd_mn_distance=$physicalScale, sd_sd_distance=$physicalScale)\n");
- }
- print PY ("# directories for input atom files\n");
- print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
- print PY ("a = automodel(env,\n");
- print PY (" alnfile = '$outbase.pir', # alignment filename\n");
- print PY (" knowns = ($knowns), # codes of the templates\n");
- print PY (" sequence = '$queryName') # code of the target\n");
- print PY ("a.starting_model= 1 # index of the first model\n");
- print PY ("a.ending_model = $NMODELS # index of the last model\n");
- # print PY ("a.deviation = 1.0\n");
- # print PY ("a.spline_on_site = False # do not use splines for approximation\n");
- # print PY ("a.library_schedule = autosched.very_fast\n");
- # print PY ("a.max_var_iterations = 300\n");
- # print PY ("a.repeat_optimization = 2\n");
- # print PY ("a.write_intermediates = True\n");
- print PY ("a.make() # do the actual homology modeling\n");
- close (PY);
-
- #prepare for MODELLER:
- system("chmod 777 $outbase.pir");
- system("chmod 777 $outbase.py");
-
- #run MODELLER: create *.ini & *.rsr
- &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
-
- my @modelsBuilt;
- #TREAT FAILED MODELLER!
- #check model(s):
- for (my $i=1; $i<=$NMODELS; $i++) {
- my $modelID = "B" . (99990000 + $i);
- if (! -e "$outbase.$modelID.pdb") {
- print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
-
- #EXCEPTION does not work!!!!debug hhmakemodel.pl
- #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
- }
- else {
- push (@modelsBuilt, $i);
- }
- }
- # find model with minimal energy
- if (scalar(@modelsBuilt) > 0) {
- my $minEnergy = +1E8;
- my $imin = -1;
- my $line;
- ## search for best model (lowest objective function value)
- ## and save this model - remove all others
- foreach my $model (@modelsBuilt) {
- my $modelID = "B" . (99990000+$model);
- open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
- my $energy;
- while ($line = <IN>) {
- if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
- $energy = $1;
- last;
- }
- }
- if ($energy < $minEnergy) {
- $minEnergy = $energy;
- $imin = $model;
- }
- close(IN);
- }
- if ($imin != -1) {
- my $minModelID = "B" . (99990000+$imin);
- &System("cp $outbase.$minModelID.pdb $outbase.pdb");
- } else {
- print "WARNING: MODELLER no minimal energy structure found!\n";
- }
- }
-
- &System("rm $outbase.sch $outbase.V999* $outbase.D000* $outbase.ini ");
- return;
- }
- #############################################################
- ## input: myrsr: rsr file
- ## queryName:
- ## workingDir: where save (temp) files
- ## outbase: outbase for pir files
- ## config:
- ##
- ##
- ## output: mod file containing the best modeller built model
- #############################################################
- sub ModellerNewDistance {
- my %args = @_;
-
- my $myrsr = $args{rsrFile};
- my $queryName = $args{queryName};
- my $workingDir = $args{workingDir};
- my $outbase = $args{outbase};
- my $config = $args{config};
- my $pdbdir = $args{pdbdir} || $config->get_pdbdir();
- my $templWeightNormalizer = $args{normalizer};
- my $doTemplWeight = 1;
- $doTemplWeight = $args{doTemplWeightEqual} if (defined($args{doTemplWeightEqual}));
- my $physicalScale = 1;
- $physicalScale = $args{physicalScale} if (defined($args{physicalScale}));
- my $NMODELS = $config->get_numberOfGeneratedModels();
- my $modeller = $config->get_modeller();
- my $numTemplates = 0;
- #Change sequence name to id_temp and get templates -> $templates
- my $templates = "";
- my $knowns = "";
- my %knowns;
- my @allTemplates; ## $knowns does not contain duplicates
- my %templateWeights;
- open (IN, "< $outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- my @lines = <IN>;
- close(IN);
- $lines[0] =~ s/^>P1;.*/>P1;$queryName/;
- for (my $i=0; $i<@lines; $i++) {
- if ($lines[$i] =~ /^sequence:.*?:\s*(\d+)\s*:/) {
- $lines[$i] =~ s/^sequence:.*?:/sequence:$queryName:/; # replace sequence name with id
- }
- elsif ($lines[$i] =~ /^structureX:/) {
- $lines[$i-1] =~ />P1;(\S+)/;
- push (@allTemplates, $1);
- $knowns .= " '$1'," if (not exists($knowns{$1}));
- $templates .= " $1";
- $numTemplates++;
- $knowns{$1} = 1;
- }
- }
- $templates =~ s/^ //;
- ## weight for template based restraints
- ## default
- my $rsrWeight = 1.0/$numTemplates;
- $rsrWeight = sprintf("%.3f", $rsrWeight);
- my $templRsrWeight = 1.0/$templWeightNormalizer;
- ## TODO: adapt weight to new template-weighting strategy
- open (OUT, ">$outbase.pir") or die ("Error: cannot open $outbase.pir: $!\n");
- print(OUT @lines);
- close(OUT);
-
- # Write the py-file
- open(PY,"> $outbase.py") or die("Cannot open: $outbase.py");
-
- if ($config->get_templateWeightStrategy() == 1) { ## WITH phylo weight
- print PY ("# Homology modeling by the automodel class\n");
- print PY ("from modeller import * # Load standard Modeller classes\n");
- print PY ("from modeller.automodel import * # Load the automodel class\n");
- if ($config->get_doParallelModeller()) {
- print PY ("from modeller.parallel import *\n");
- }
- print PY ("from modeller.scripts import complete_pdb\n");
-
- print PY ("from modeller import _cuser_form54\n");
- print PY ("log.verbose()\n");
- print PY ("env = environ()\n");
- ## downweight template based restraints
- if ($doTemplWeight) {
- my $rsrWeight = $physicalScale;
- print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$rsrWeight, n_o_distance=$rsrWeight, sd_mn_distance=$rsrWeight, sd_sd_distance=$rsrWeight)\n"); #, chi1_dihedral=$templRsrWeight, chi2_dihedral=$templRsrWeight, chi3_dihedral=$templRsrWeight, chi4_dihedral=$templRsrWeight, phi_dihedral=$templRsrWeight, psi_dihedral=$templRsrWeight, omega_dihedral=$templRsrWeight, phi_psi_dihedral=$templRsrWeight)\n");
- }
- print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
- print PY ("\n");
-
- print PY ("class MyGauss(forms.restraint_form):\n");
- print PY (" \"\"\"Multiple Gaussian\"\"\"\n");
- print PY (" _builtin_index = _cuser_form54.myform_create()\n");
- print PY (" def __init__(self, group, feature, weights, means, stdevs, phyloWeight):\n");
- print PY (" lv = -1\n");
- print PY (" for var in (weights, means, stdevs):\n");
- print PY (" if (lv >= 0 and lv != len(var)) \\\n");
- print PY (" or not isinstance(var, (tuple, list)):\n");
- print PY (" raise TypeError(\"weights, means and stdevs should all be \"\n");
- print PY (" + \"sequences of the same length\")\n");
- print PY (" lv = len(var)\n");
- print PY (" forms.restraint_form.__init__(self, group, feature, len(weights),\n");
- print PY (" tuple(weights) + tuple(means) + tuple(stdevs) + phyloWeight)\n");
- print PY ("a = automodel(env,\n");
- print PY (" alnfile = '$outbase.pir', # alignment filename\n");
- print PY (" knowns =($knowns), # codes of the templates\n");
- print PY (" sequence ='$queryName', # code of the target\n");
- print PY (" csrfile = '$myrsr') # use 'my' restraints file\n");
- print PY ("a.starting_model= 1 # index of the first model\n");
- print PY ("a.ending_model = $NMODELS # index of the last model\n");
- # print PY ("a.library_schedule = autosched.slow\n");
- # print PY ("a.max_var_iterations = 300\n");
- # print PY ("a.repeat_optimization = 2\n");
- # print PY ("a.deviation = 0.0\n");
- # print PY ("a.write_intermediates = True\n");
- if ($config->get_doParallelModeller()) {
- ## all slaves have to have their own initialization with new restraints...
- print PY ("j = job()\n");
- print PY ("j.slave_startup_commands.append(\"from modeller import _cuser_form54\")\n");
- print PY ("j.slave_startup_commands.append(\"class MyGauss(forms.restraint_form):\\n\" + \n");
- print PY (" \" _builtin_index = _cuser_form54.myform_create()\")\n");
- my $cpus = $config->get_cpus();
- for (my $i=0; $i < &min($NMODELS, $cpus); $i++) {
- print PY ("j.append(local_slave())\n");
- }
- print PY ("a.use_parallel_job(j)\n");
- }
- print PY ("a.make() # do the actual homology modeling\n");
- } else {
- ## NO PHYLO WEIGHT
- print PY ("# Homology modeling by the automodel class\n");
- print PY ("from modeller import * # Load standard Modeller classes\n");
- print PY ("from modeller.automodel import * # Load the automodel class\n");
- if ($config->get_doParallelModeller()) {
- print PY ("from modeller.parallel import *\n");
- }
- print PY ("from modeller.scripts import complete_pdb\n");
-
- print PY ("from modeller import _cuser_form52\n");
- print PY ("log.verbose()\n");
- print PY ("env = environ()\n");
- ## downweight template based restraints
- if ($doTemplWeight) {
- ## !!! TEST !!!
- my $rsrWeight = $physicalScale;
- print PY ("env.schedule_scale=physical.values(default=1.0, ca_distance=$rsrWeight, n_o_distance=$rsrWeight, sd_mn_distance=$rsrWeight, sd_sd_distance=$rsrWeight)\n"); #, chi1_dihedral=$rsrWeight, chi2_dihedral=$rsrWeight, chi3_dihedral=$rsrWeight, chi4_dihedral=$rsrWeight, phi_dihedral=$rsrWeight, psi_dihedral=$rsrWeight, omega_dihedral=$rsrWeight, phi_psi_dihedral=$rsrWeight)\n");
- ## !!! end TEST !!!
- }
- print PY ("env.io.atom_files_directory = '$pdbdir:$workingDir'\n");
- print PY ("\n");
-
- print PY ("class MyGauss(forms.restraint_form):\n");
- print PY (" \"\"\"Multiple Gaussian\"\"\"\n");
- print PY (" _builtin_index = _cuser_form52.myform_create()\n");
- print PY (" def __init__(self, group, feature, weights, means, stdevs):\n");
- print PY (" lv = -1\n");
- print PY (" for var in (weights, means, stdevs):\n");
- print PY (" if (lv >= 0 and lv != len(var)) \\\n");
- print PY (" or not isinstance(var, (tuple, list)):\n");
- print PY (" raise TypeError(\"weights, means and stdevs should all be \"\n");
- print PY (" + \"sequences of the same length\")\n");
- print PY (" lv = len(var)\n");
- print PY (" forms.restraint_form.__init__(self, group, feature, len(weights),\n");
- print PY (" tuple(weights) + tuple(means) + tuple(stdevs))\n");
- print PY ("a = automodel(env,\n");
- print PY (" alnfile = '$outbase.pir', # alignment filename\n");
- print PY (" knowns =($knowns), # codes of the templates\n");
- print PY (" sequence ='$queryName', # code of the target\n");
- print PY (" csrfile = '$myrsr') # use 'my' restraints file\n");
- print PY ("a.starting_model= 1 # index of the first model\n");
- print PY ("a.ending_model = $NMODELS # index of the last model\n");
- # print PY ("a.library_schedule = autosched.slow\n");
- # print PY ("a.max_var_iterations = 300\n");
- # print PY ("a.repeat_optimization = 2\n");
- # print PY ("a.deviation = 0.0\n");
- # print PY ("a.write_intermediates = True\n");
- if ($config->get_doParallelModeller()) {
- ## all slaves have to have their own initialization with new restraints...
- print PY ("j = job()\n");
- print PY ("j.slave_startup_commands.append(\"from modeller import _cuser_form52\")\n");
- print PY ("j.slave_startup_commands.append(\"class MyGauss(forms.restraint_form):\\n\" + \n");
- print PY (" \" _builtin_index = _cuser_form52.myform_create()\")\n");
- my $cpus = $config->get_cpus();
- for (my $i=0; $i < &min($cpus,$NMODELS); $i++) {
- print PY ("j.append(local_slave())\n");
- }
- print PY ("a.use_parallel_job(j)\n");
- }
- print PY ("a.make() # do the actual homology modeling\n");
- }
- close (PY);
-
- system("chmod 777 $outbase.pir");
- system("chmod 777 $outbase.py");
-
- #run MODELLER:
- if ($config->get_doParallelModeller()) {
- my $modellerParallel = $config->get_modellerParallel();
- &System("cd $workingDir; $modellerParallel $outbase.py > $workingDir/$queryName.modeller.log");
- } else {
- &System("cd $workingDir; $modeller $outbase.py > $workingDir/$queryName.modeller.log");
- }
- ## modelling results, e.g. pdb files
- my $resultbase = $outbase;
- my @modelsBuilt;
- #TREAT FAILED MODELLER!
- #check model(s):
- for (my $i=1; $i<=$NMODELS; $i++) {
- my $modelID = "B" . (99990000+$i);
- if (! -e "$outbase.$modelID.pdb") {
- print("WARNING: MODELLER failed for $outbase.$modelID.pdb!\nSee log file $outbase.modeller.log\n");
-
- #EXCEPTION does not work!!!!debug hhmakemodel.pl
- #&System("perl $hh/hhmakemodel.pl $tmpname.hhr -m $singleTemp -q $tmpname.a3m -v -ts $tmpname.mod -d $pdbdir");
- }
- else {
- push (@modelsBuilt, $i);
- }
- }
- # find model with minimal energy
- if (scalar(@modelsBuilt) > 0) {
- my $minEnergy = +1E8;
- my $imin = -1;
- my $line;
- ## search for best model (lowest objective function value)
- ## and save this model - remove all others
- foreach my $model (@modelsBuilt) {
- my $modelID = "B" . (99990000+$model);
- open(IN, "<$outbase.$modelID.pdb") || die("Error: can't open $outbase.$modelID.pdb");
- my $energy;
- while ($line = <IN>) {
- if ($line =~ /MODELLER OBJECTIVE FUNCTION:\s*(\S+)/) {
- $energy = $1;
- last;
- }
- }
- if ($energy < $minEnergy) {
- $minEnergy = $energy;
- $imin = $model;
- }
- close(IN);
- }
- if ($imin != -1) {
- my $minModelID = "B" . (99990000+$imin);
- &System("cp $outbase.$minModelID.pdb $resultbase.pdb");
- } else {
- print "WARNING: MODELLER no minimal energy structure found!\n";
- }
- }
-
- &System("rm $resultbase.sch $resultbase.V999* $resultbase.D000*");
-
- #Add END line
- #my $repairpdb = $config->get_repairPDB();
- ## repairpdb is not necessary any more in newer Modeller version
- # &System("$repairpdb $resultbase.mod &> /dev/null");
-
- return;
- }
- sub readTemplateWeightsFromFile {
- my $file = shift;
- my %weights;
- open(TW, "< $file") or die "Cant read $file: $!\n";
- while (my $line = <TW>) {
- if ($line =~ /(\S+)\s+(\S+)/) {
- $weights{$1} = $2;
- }
- }
- close(TW);
- return %weights;
- }
- #############################################
- ## ugly but faster than passing by reference
- ## maybe avoid it by writing a class instead
- my @PDBs;
- my @atomsForResidue;
- #################################################################
- ## input: rsrfile
- ## inifile
- ## inbase
- ## array of templates
- ## output: side effect: write out a new restraint file:
- ## rsrfile.new.rsr
- #################################################################
- sub ChangeDistanceRestraints {
- my $outbase = shift;
- my $workingDir = shift;
- my $templateList = shift;
- my $config = shift;
- my $pdbdir = shift || $config->get_pdbdir();
- my $queryName = &get_basename($outbase);
- my $weightTemplates = $config->get_templateWeightStrategy();
- my %templateWeight;
- my $templWeightNormalizer = 0;
- @PDBs = ();
- @atomsForResidue = ();
- if ($weightTemplates == 1) {
- if ($templateList->size() == 1) {
- $templateWeight{$templateList->get(0)->get_Hit()} = 1;
- $templWeightNormalizer = 1;
- } else {
- ## copy template hhm files to workingDir
- for (my $i=0; $i<$templateList->size(); $i++) {
- my $tname = $templateList->get($i)->get_Hit();
- system("cp $pdbdir/$tname.hhm $workingDir");
- }
- ## build upgma tree, leaves are numbered according to row-number in templateList (0=query)
-
- my @distanceMatrix = &calculatePairwiseDistances($queryName, $templateList, $workingDir, $workingDir);
- my $phyloTree = &upgma(@distanceMatrix);
- ## root tree at query (= node 0)
- print "PhyloTree:\n";
- print "==========\n";
- print $phyloTree->print() . "\n";
- my $rerootedTree = $phyloTree->reRoot(0);
- print "rerootedTree:\n";
- print "=============\n";
- $rerootedTree->print();
- print "=============\n";
- ## tweights containts leaf/row=>weight entries
- my %tweights = &iterativelyWeightTemplates($rerootedTree);
-
- ## replace indices with template names
- foreach my $leaf (keys %tweights) {
- my $tname = $templateList->get($leaf-1)->get_Hit();
- $templateWeight{$tname} = $tweights{$leaf};
- }
- print "ChangeDistanceRestraints template weights\n";
- foreach my $template (keys %templateWeight) {
- print "$template=>$templateWeight{$template}\n";
- $templWeightNormalizer += $templateWeight{$template};
- }
- print "==============================\n";
- }
- } else {
- $templWeightNormalizer = $templateList->size();
- }
- ## these files must have been generated before
- my $rsrfile = "$outbase.rsr";
- my $inifile = "$outbase.ini";
- my $pirfile = "$outbase.pir";
- #get query length
- #my $querylength = $templateList->get_queryLength();
- ## set up mixture density networks
- ## these will be needed later in order to calculate parameters of new contraints
- my $mdnCACA = multiTemplateMDN->new($config->get_MDNWeightsLayer1CACA(), $config->get_MDNWeightsLayer2CACA());
- my $mdnNO = multiTemplateMDN->new($config->get_MDNWeightsLayer1NO(), $config->get_MDNWeightsLayer2NO());
- my $mdnSCMC = multiTemplateMDN->new($config->get_MDNWeightsLayer1SCMC(), $config->get_MDNWeightsLayer2SCMC());
- my $mdnSCSC = multiTemplateMDN->new($config->get_MDNWeightsLayer1SCSC(), $config->get_MDNWeightsLayer2SCSC());
-
- #get similarity of each template and read TAB file(s)
- my @sim; #contains similarities of Templates
- my @templates;#contains PDB-Identifier of Templates
- my @TABprobs; #contains probability of ij to according i from *.tab file
-
- #use *.afh file (hit list with filter strength of each hit):
- #
- #HHR HIT TEMPLATE PROB SCORE SS/Length SIM SumPr/L QueryStart-End COLS TempStart-End TMSCORE
- #1 1 1rwz_A 98.0 56.2 0.0833 0.164 0.5697 3 225 213 5 245 0.522695
- #2 1 1rwz_A 97.3 56.2 0.0882 0.177 0.5412 2 225 211 9 245 0.518335
- #0 1 1rwz_A 98.0 56.2 0.0794 0.173 0.5697 2 225 214 5 245 0.517708
- #1 5 1plq 97.0 43.8 0.0877 0.094 0.5342 1 225 217 2 257 0.503807
- #...
-
-
- ## for each template ...
- for(my $t=0; $t<$templateList->size(); $t++) {
- my $template = $templateList->get($t);
- $templates[$t] = $template->get_Hit();
- my $hitnr = $template->get_No();
- my $hhrnr = $template->get_Filt();
- $sim[$t] = $template->get_Sim();
- #my $templatelength=$4-$3+1;
- my $tab = "$outbase." . $templates[$t] . ".HIT$hitnr.tab";
- print "HIT: $t TAB:$tab SIM:$sim[$t]\n";
-
- ## ... build TAB file, based on corresponding tab file
- if (! (-e "$tab")) {
- &BuildSingleTabFile("$outbase.$hhrnr.tab", $hitnr, $outbase);
- }
- open(TAB,"< $tab") or die("Error reading $tab: $!\n");
- while (my $line = <TAB>) {
- # i j score SS probab
- # 4 400 -0.64 -0.28 0.0101
- if ($line =~ /\s+(\d+)\s+(\d+)\s+\S+\s+\S+\s+(\S+)/) {
- my $i = $1;
- my $j = $2;
- my $probab = $3;
- if (!($TABprobs[$i])) {
- $TABprobs[$1] = [$probab,$t,$j];
- }
- else {
- push(@{$TABprobs[$i]},$probab,$t,$j);
- }
- }
- }
- close(TAB);
- my $seenValidLine = 0;
- ## ... read template PDB
- open(PDBTemp, "< $pdbdir/$templates[$t].pdb") or die("Error reading $pdbdir/$templates[$t].pdb: $!\n");
- while (my $line = <PDBTemp>){
- #ATOM 1 N PRO 1 -29.477 -9.021 66.175 1.00 75.79 N
- #ATOM 3781 CB MET 1 477 25.003 121.648 32.112 1.00 90.08 C
- if ($line =~ /^ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*/){
- $seenValidLine = 1;
- ## save Atomname Residuename ResidueNr x y z
- $PDBs[$t][$1]=[$2,$3,$4,$5,$6,$7];
- ## speed up: save all atoms for a given residue (in a given template); will be needed later to compute the distance between "matching" atoms
- if (not @{$atomsForResidue[$t][$4]}) {
- $atomsForResidue[$t][$4] = [$1];
- }
- else {
- push(@{$atomsForResidue[$t][$4]}, $1);
- }
- }
- }
- if ($seenValidLine) {
- for (my $r=0; $r < @{$atomsForResidue[$t]}; $r++) {
- my $rr = @{$atomsForResidue[$t]}[$r];
- next if (not defined($rr));
- }
- }
- # $atomsForResiue[$t] = \%atomsForResidue;
- close(PDBTemp);
- } ## end for all templates
- #e.g.: print"$PDBs[4][1227][0] $PDBs[4][1227][1] $PDBs[4][1227][2]\n";
- #$TABprobs[x][0] x=atom 0=probabilityij
- #$TABprobs[x][1] 1=#of tabfile
- #$TABprobs[x][2] 2=probabilityij(e.g. if more than one template is used!!!)
- #$TABprobs[x][3] 3=aligned residue number of template
- #$TABprobs[x][4] 4=#of tabfile
- #... 5=probabilityij(e.g. if more than one template is used!!!)
- #... ...
- #################################
- #read INIPDB file of query: *.ini
- #################################
- #INIPDB-FILE:
- #ATOM 1 N PRO 1 -29.477 -9.021 66.175 1.00 75.79 N
- #ATOM 2 CA PRO 1 -28.807 -9.573 65.023 1.00 75.45 C
- #...
- #1234567890123456789012345678901234567890...
-
- #find atom name in query ini-pdb:
- my @iniPDBresNr;
- open(iniPDB,"< $inifile") or die("Error reading file: $inifile: $!\n");
- while (my $line = <iniPDB>){
- if ($line =~ /^ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*/){
- ## atomNr => residueNr, atomName, residueName
- $iniPDBresNr[$1] = [$4,$2,$3];
- }
- }
- close(iniPDB);
- #0:
- #1: 1 N MET
- #2: 1 CA MET
- #3: 1 CB MET
- #4: 1 CG MET
- #...
- #####################
- #read restraints file
- #####################
- open(RSR,"< $rsrfile") or die("Error reading file: $rsrfile: $!\n");
- my @rsrlines = <RSR>;
- ## R 3 1 1 1 2 2 1 3 2 1.5380 0.0364
- ## R 3 1 1 26 2 2 1 1394 1422 3.8982 6.2527
- close(RSR);
-
- #Spline restraint
- # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
- #R 10 39 1 9 2 45 1 25 363 1.0000 0.0000 26.6000 0.7000 -0.6596...
-
- ############################################
- #change old restraints in Modeller rsr-file:
- ############################################
- ## comment: simply using Modellers mean for the template distance
- ## works only for single template modelling (otherwise splines are used), therefore
- ## one needs a subroutine to extract the distances in templates
- ##
- my @newrsrfile;
- ## if no distance found in template for a specific restraint, the old one can be used (remainRestr=1)
- ## or the restraint can be neglected (rmainRestr=0)
- my $remainRestr = 1;
- for (my $i=1; $i<@rsrlines; $i++) {
- #
- if ($i % 1000 == 0) {
- print "$i iterations in changeRestraints\n";
- }
- ## 9|10|23|26
- if ($rsrlines[$i] =~ /^R\s+(3|10)\s+\d+\s+\d+\s+(9|10|23|26)\s+2\s+\d+\s+\d+\s+(\S+)\s+(\S+)\s+(\S+)/) {
- #print "\nLINE:$i $rsrlines[$i]";
- #print "rsrType:$1 group:$2 atom1:$3 atom2:$4\n";
- #search i, j, delta from *.ini
- my $Qi;
- my $Qj;
- my $queryatomnameOne;
- my $queryatomnameTwo;
- my $queryresiduenameOne;
- my $queryresiduenameTwo;
-
- if ($iniPDBresNr[$3][0] && $iniPDBresNr[$4][0]){
- $Qi = $iniPDBresNr[$3][0]; #residue i of Query (number)
- $Qj = $iniPDBresNr[$4][0]; #residue j of Query (number)
- $queryatomnameOne = $iniPDBresNr[$3][1]; #atomname of residue i of Query
- $queryatomnameTwo = $iniPDBresNr[$4][1]; #atomname of residue j of Query
- $queryresiduenameOne = $iniPDBresNr[$3][2]; #residuename of i of Query
- $queryresiduenameTwo = $iniPDBresNr[$4][2]; #residuename of j of Query
- }
- else { die("Error in file: $inifile"); }
-
- my $form = $1;
- my $rsrType = $2;
- my $atom1 = $3;
- my $atom2 = $4;
- my $checkdelta = $5;
- ################################################################################################################
- #search prob & sim for i-i' and j-j'from TAB file & calculate new values for new log-normal Gaussian distribution:
- if ($TABprobs[$Qi][0] && $TABprobs[$Qj][0]) {
- #e.g.:
- # atom prob Template alternative probability from alternative Template
- # 55: 0.1119 1
- # 56: 0.0874 1
- # 57: 0.0733 1
- # 58: 0.3256 0 0.0454 1
- # 59: 0.3552 0 0.0369 1
- # 60: 0.4030 0 0.0274 1
- # 61: 0.4952 0 0.0178 1
- # 62: 0.5366 0
- # 63: 0.5986 0
- # 64: 0.6060 0
-
- #e.g. case2 (more than one template for i-i'): atom 59: prob i-i' 0.3552 in Template 1 and 0.0369 in Template 2
- # atom 63: prob j-j' 0.5986 in Template 1
- # so take prob i-i' 0.3552 from Template 1 (NOT from Template 2!!!)
- # $TABprobs[$i][+0] = probab
- # $TABprobs[$i][+1] = t
- # $TABprobs[$i][+2] = j
-
- #if there is more than one template for i-i' AND j-j'
- #if there is more than one template for i-i' and template for j-j'
- # Q |---------------------------------------------------------------|
- # T1 |--------------------------------|
- # T2 |-------------------------------|
- # j i
- if (@{$TABprobs[$Qi]}>3 && @{$TABprobs[$Qj]}>3){
- for (my $t=0;$t<@{$TABprobs[$Qi]}; $t=$t+3){
- my $probi = $TABprobs[$Qi][$t];
- for (my $r=0;$r<@{$TABprobs[$Qj]}; $r=$r+3){
- ## Qi and Qj aligned to residues in same template
- if ($TABprobs[$Qi][$t+1] == $TABprobs[$Qj][$r+1]){
- my $probj = $TABprobs[$Qj][$r]; #probability of j-j
- my $sim = $sim[$TABprobs[$Qj][$r+1]]; #similarity of according template
- my $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][$t+2],$TABprobs[$Qj][$r+2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qj][$r+1], 1);
-
- if ( ($dtemplate ne "NULL")) {
- my $posteriori = $probi*$probj;
- if ($Qi - $TABprobs[$Qi][$t+2] == $Qj - $TABprobs[$Qj][$r+2]) {
- $posteriori = min($probi, $probj);
- }
-
- # next if ($posteriori < 0.2);
- ## skip too long distance-restraints
- next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
- my $newline = "";
- my $hit = $templateList->get($TABprobs[$Qi][$t+1])->get_Hit();
- if ($weightTemplates) {
- my $tweight = $templateWeight{$hit};
- $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
- }
- else {
- $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
- }
- $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][$t+2] $TABprobs[$Qj][$r+2]\n";
- push(@newrsrfile, $newline);
- }
- elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
- }
- }
- }
- }
- #if there is more than one template for i-i' and one template for j-j'
- # Q |---------------------------------------------------------------|
- # T1 |--------------------------------|
- # T2 |-------------------------------|
- # j i
- elsif (@{$TABprobs[$Qi]}>3 && @{$TABprobs[$Qj]}==3) {
- #print "more than one template for i-i' atom: $atom1 $atom2\n";
- my $probi;
- my $probj = $TABprobs[$Qj][0]; #probability of j-j'
- my $sim = $sim[$TABprobs[$Qj][1]]; #similarity of according template
- my $dtemplate = "NULL";
- my $posteriori = 0;
- for (my $r=1; $r<@{$TABprobs[$Qi]}; $r=$r+3){
- if ($TABprobs[$Qj][1] == $TABprobs[$Qi][$r]){
- $probi = $TABprobs[$Qi][$r-1]; #probability of i-i'
- $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][$r+1],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qj][1], 2);
- $posteriori = $probi*$probj;
-
- if ($Qi - $TABprobs[$Qi][$r+1] == $Qj - $TABprobs[$Qj][2]) {
- $posteriori = min($probi, $probj);
- }
-
- if ( ($dtemplate ne "NULL")) {
- # next if ($posteriori < 0.2);
- ## skip too long distance-restraints
- next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
- my $newline = "";
- my $hit = $templateList->get($TABprobs[$Qj][1])->get_Hit();
- if ($weightTemplates) {
- my $tweight = $templateWeight{$hit};
- $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
- }
- else {
- $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType, $atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
- }
- $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][$r+1] $TABprobs[$Qj][2]\n";
- push(@newrsrfile,$newline);
- }
- elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
- }
- }
- }
- #if there is more than one template for j-j' and one template for i-i'
- # Q |---------------------------------------------------------------|
- # T1 |------------------------------------|
- # T2 |-----------------------------------|
- # i j
- elsif (@{$TABprobs[$Qi]}==3 && @{$TABprobs[$Qj]}>3){
- #print "more than one template for j-j' atom: $atom1 $atom2\n";
- my $probi = $TABprobs[$Qi][0]; #probability of i-i'
- my $probj;
- my $sim = $sim[$TABprobs[$Qi][1]]; #similarity of according template
- my $dtemplate = "NULL";
- my $posteriori = 0;
- for (my $r=1;$r<@{$TABprobs[$Qj]}; $r=$r+3){
- if ($TABprobs[$Qi][1] == $TABprobs[$Qj][$r]){
- $probj = $TABprobs[$Qj][$r-1]; #probability of j-j'
- $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][$r+1],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 3);
- $posteriori = $probi*$probj;
-
- if ($Qi - $TABprobs[$Qi][2] == $Qj - $TABprobs[$Qj][$r+1]) {
- $posteriori = min($probi, $probj);
- }
- if ( ($dtemplate ne "NULL")) {
- # next if ($posteriori < 0.2);
- ## skip too long distance-restraints
- next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
- my $newline = "";
- my $hit = $templateList->get($TABprobs[$Qi][1])->get_Hit();
- if ($weightTemplates) {
- my $tweight = $templateWeight{$hit};
- $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
- }
- else {
- $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
- }
- $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][2] $TABprobs[$Qj][$r+1]\n";
- push(@newrsrfile, $newline);
- }
- elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
- }
- }
- }
- #only one template (per position; but it is possible that the two residues are (one-fold) covered by two
- # different (single covering) templates) => test if same template
- # Q |--------------------------------------------------------|
- # T1 |----------------------|
- # T2 |-------------------|
- # i j
- elsif (@{$TABprobs[$Qi]}==3 && @{$TABprobs[$Qj]}==3) {
- my $probi = $TABprobs[$Qi][0]; #probability of i-i'
- my $probj = $TABprobs[$Qj][0]; #probability of j-j'
- my $sim = $sim[$TABprobs[$Qi][1]]; #similarity of according template
- my $dtemplate = "NULL";
- # same template???
- if ($TABprobs[$Qi][1] == $TABprobs[$Qj][1]) {
- $dtemplate = &DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 4);
- #if ($atom1==19) {
- # print "DistanceInTemplate($atom1,$atom2, $Qi,$Qj,$TABprobs[$Qi][2],$TABprobs[$Qj][2],$queryatomnameOne,$queryatomnameTwo,$queryresiduenameOne,$queryresiduenameTwo,$TABprobs[$Qi][1], 4)=$dtemplate\n";
- #}
- }
- if ( ($dtemplate ne "NULL")) {
- my $posteriori = $probi*$probj;
-
- if ($Qi - $TABprobs[$Qi][2] == $Qj - $TABprobs[$Qj][2]) {
- $posteriori = min($probi, $probj);
- }
-
- # next if ($posteriori < 0.2);
-
- ## skip too long distance-restraints
- next if (($dtemplate > 28 and $rsrType==9) or ($dtemplate > 19 and $rsrType==10) or ($dtemplate > 11 and $rsrType==23) or ($dtemplate>10 and $rsrType==26));
- my $newline = "";
- my $hit = $templateList->get($TABprobs[$Qi][1])->get_Hit();
- if ($weightTemplates) {
- my $tweight = $templateWeight{$hit};
- $newline = &CalcRSRTweight($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC, $tweight);
- }
- else {
- $newline = &CalcRSR($posteriori, $sim, $dtemplate, $rsrType,$atom1,$atom2, $mdnCACA, $mdnNO, $mdnSCMC, $mdnSCSC);
- }
- $newline .= " $hit $posteriori $sim $dtemplate $Qi $Qj $TABprobs[$Qi][2] $TABprobs[$Qj][2]\n";
- push(@newrsrfile,$newline);
- }
- elsif ($remainRestr) { push(@newrsrfile, $rsrlines[$i]); }
-
- }
- else{print "WARNING1: error in line: $rsrlines[$i] check PIR-Alignment: $outbase.pir and Tab!\n";}
- }
- else{
- print "WARNING2: in line: $rsrlines[$i] check tabfile(s), residues Qi=$Qi, Qj=$Qj; keep old restraint!\n";
- if ($remainRestr) {push(@newrsrfile, $rsrlines[$i])};
- }
- }
- else {push(@newrsrfile,$rsrlines[$i]);}
- }
- $rsrfile =~ s/\.rsr$//;
- open (OUT, ">$rsrfile.new.rsr") or die ("Error: cannot open $rsrfile.new.rsr: $!\n");
- print(OUT @newrsrfile);
- close(OUT);
- return $templWeightNormalizer;
- }
- ########################################################
- ## input: self explanatory, see arguments
- ## output: distance between specified atoms in specified
- ## residue
- ########################################################
- sub DistanceInTemplate {
- my $queryatomOne = $_[0]; #209
- my $queryatomTwo = $_[1]; #222
- my $queryresidueOne = $_[2]; #27
- my $queryresidueTwo = $_[3]; #29
- my $tempresidueOne = $_[4]; #4
- my $tempresidueTwo = $_[5]; #6
- my $queryatomnameOne = $_[6]; #CA
- my $queryatomnameTwo = $_[7]; #CA
- my $queryresiduenameOne = $_[8]; #ILE
- my $queryresiduenameTwo = $_[9]; #LEU
- my $templatenr = $_[10]; #0
- my $case = $_[11];
- #@PDBs; global contains: Atomname[0] Residuename[1] ResidueNr[2] x[3] y[4] z[5]
- my $dtemplate;
-
- #print"search Query:$queryatomnameOne,$queryresiduenameOne & $queryatomnameTwo,$queryresiduenameTwo Temp:$tempresidueOne $tempresidueTwo\n";
- #print"OPEN: $templatename.pdb search:$tempresidueOne\n";
-
- #find xyz in template pdb
- my @coordinatesOne = ();
- my @coordinatesTwo = ();
- my $a = 0;
- my $b = 0;
- my $c = 0;
- my $d = 0;
- my $e = 0;
- my $f = 0;
- if (@{$atomsForResidue[$templatenr][$tempresidueOne]}) {
- foreach my $atom (@{$atomsForResidue[$templatenr][$tempresidueOne]}) {
- push(@coordinatesOne, [$PDBs[$templatenr][$atom][0],$PDBs[$templatenr][$atom][1],$PDBs[$templatenr][$atom][3],$PDBs[$templatenr][$atom][4],$PDBs[$templatenr][$atom][5]]);
- }
- }
- if (@{$atomsForResidue[$templatenr][$tempresidueTwo]}) {
- foreach my $atom (@{$atomsForResidue[$templatenr][$tempresidueTwo]}) {
- push(@coordinatesTwo, [$PDBs[$templatenr][$atom][0],$PDBs[$templatenr][$atom][1],$PDBs[$templatenr][$atom][3],$PDBs[$templatenr][$atom][4],$PDBs[$templatenr][$atom][5]]);
- }
- }
- ## find all atoms for one template belonging to the template residues given, e.g. 27 and 29
- ## TODO: avoid traversing whole array by recording all atoms for each residue for each template
- # for(my $i=0; $i<@{$PDBs[$templatenr]}; $i++) {
- # if (($PDBs[$templatenr][$i][2]) && ($PDBs[$templatenr][$i][2] == $tempresidueOne)) {
- # #print"$PDBs[$templatenr][$i][0] $PDBs[$templatenr][$i][1] $PDBs[$templatenr][$i][2] $PDBs[$templatenr][$i][3] $PDBs[$templatenr][$i][4] $PDBs[$templatenr][$i][5]\n";
- # push(@coordinatesOne,[$PDBs[$templatenr][$i][0],$PDBs[$templatenr][$i][1],$PDBs[$templatenr][$i][3],$PDBs[$templatenr][$i][4],$PDBs[$templatenr][$i][5]]);
- # }
- # if (($PDBs[$templatenr][$i][2])&&($PDBs[$templatenr][$i][2] == $tempresidueTwo)) {
- # #print"$PDBs[$templatenr][$i][0] $PDBs[$templatenr][$i][1] $PDBs[$templatenr][$i][2] $PDBs[$templatenr][$i][3] $PDBs[$templatenr][$i][4] $PDBs[$templatenr][$i][5]\n";
- # push(@coordinatesTwo,[$PDBs[$templatenr][$i][0],$PDBs[$templatenr][$i][1],$PDBs[$templatenr][$i][3],$PDBs[$templatenr][$i][4],$PDBs[$templatenr][$i][5]]);
- # }
- # }
- # foreach my $atom {$atomsForResidue[$templatenr]}{$tempresidueOne}
- my $foundOne = 0;
- my $foundTwo = 0;
- for(my $k=0; $k<@coordinatesOne; $k++) {
- #print "$coordinatesOne[$k][0] eq $queryatomnameOne?\n";
- if ($coordinatesOne[$k][0] eq $queryatomnameOne) {
- $a = $coordinatesOne[$k][2];
- $b = $coordinatesOne[$k][3];
- $c = $coordinatesOne[$k][4];
- $foundOne = 1;
- last;
- }
- }
- for(my $k=0; $k<@coordinatesTwo; $k++) {
- #print "$coordinatesTwo[$k][0] eq $queryatomnameTwo?\n";
- if ($coordinatesTwo[$k][0] eq $queryatomnameTwo) {
- $d = $coordinatesTwo[$k][2];
- $e = $coordinatesTwo[$k][3];
- $f = $coordinatesTwo[$k][4];
- $foundTwo = 1;
- last;
- }
- }
- if($foundOne==0 || $foundTwo==0) {
- if (@coordinatesOne) {
- if (@coordinatesTwo) {
- if($foundOne==0) {
- my @alternatives = &searchAlternatives($queryresiduenameOne,$queryatomnameOne);
- #print "QUERY1:$queryatomnameOne in $queryresiduenameOne TEMPLATE:\t";
- #print"\nsearchAlternatives 1 zu $queryresiduenameOne $queryatomnameOne ($queryatomOne $queryatomTwo) found:\n";
- for(my $k=0; $k<@coordinatesOne; $k++) {
- #print "$coordinatesOne[$k][0] in $coordinatesOne[$k][1]\nALTERNATIVE?\n";
- for (my $w=1; $w<@alternatives; $w++) {
- #print "$alternatives[$w]\n";
- if ($alternatives[$w] =~ /\s$coordinatesOne[$k][1]:$coordinatesOne[$k][0]\s/) {
- #print "HIT!!!: $coordinatesOne[$k][2] $coordinatesOne[$k][3] $coordinatesOne[$k][4]";
- $a += $coordinatesOne[$k][2];
- $b += $coordinatesOne[$k][3];
- $c += $coordinatesOne[$k][4];
- $foundOne++;
- }
- }
- }
- }
- if($foundTwo==0) {
- my @alternatives = &searchAlternatives($queryresiduenameTwo,$queryatomnameTwo);
- #print "\nQUERY2:$queryatomnameTwo in $queryresiduenameTwo TEMPLATE:\t";
- #print"\nsearchAlternatives 2 zu $queryresiduenameTwo $queryatomnameTwo ($queryatomOne $queryatomTwo) found:\n";
- for(my $k=0; $k<@coordinatesTwo; $k++) {
- #print "$coordinatesTwo[$k][0] in $coordinatesTwo[$k][1]\nALTERNATIVE?\n";
- for (my $w=1; $w<@alternatives; $w++) {
- #print "$alternatives[$w]\n";
- if ($alternatives[$w] =~ /\s$coordinatesTwo[$k][1]:$coordinatesTwo[$k][0]\s/){
- #print "HIT!!!: $coordinatesTwo[$k][2] $coordinatesTwo[$k][3] $coordinatesTwo[$k][4]";
- $d += $coordinatesTwo[$k][2];
- $e += $coordinatesTwo[$k][3];
- $f += $coordinatesTwo[$k][4];
- $foundTwo++;
- }
- }
- }
- }
- if ($foundOne>0 && $foundTwo>0) {
- $a /= $foundOne;
- $b /= $foundOne;
- $c /= $foundOne;
- $d /= $foundTwo;
- $e /= $foundTwo;
- $f /= $foundTwo;
- $dtemplate= sqrt(($a-$d)*($a-$d) + ($b-$e)*($b-$e) + ($c-$f)*($c-$f));
- print"FOUND ALTERNATIVE distance: $dtemplate\n";
- }
- else{$dtemplate = "NULL";}
- }
- else{$dtemplate = "NULL";}
- }
- else{$dtemplate = "NULL";}
- }
- else {
- $dtemplate= sqrt(($a-$d)*($a-$d) + ($b-$e)*($b-$e) + ($c-$f)*($c-$f));
- if ($dtemplate < 1) {
- print "==> Distance < 1 : $dtemplate\n";
- print "queryatomOne = $queryatomOne\n";
- print "queryatomTwo = $queryatomTwo\n";
- print "queryresidueOne = $queryresidueOne\n";
- print "queryresidueTwo = $queryresidueTwo\n";
- print "tempresidueOne = $tempresidueOne\n";
- print "tempresidueTwo = $tempresidueTwo\n";
- print "queryatomnameOne = $queryatomnameOne\n";
- print "queryatomnameTwo = $queryatomnameTwo\n";
- print "queryresidue = $queryresiduenameOne\n";
- print "queryresiduenameTwo = $queryresiduenameTwo\n";
- print "templatenr = $templatenr\n";
- print "case = $case\n";
- }
- }
- #print "DELTA: $dtemplate\n";
- return $dtemplate;
- }
- ##################################################################
- ## input: self explanatory, see arguments
- ## output: constraint line as needed for modeller constraint file
- ##
- ## description:
- ## the parameters for the new constraint are calculated by a MDN
- ## parameters with index 2 are background parameters
- ## the new restraint is generated and returned
- ##################################################################
- sub CalcRSR{
- my $posteriori = shift;
- my $sim = shift;
- my $dtemplate = shift;
- my $rsrType = shift;
- my $atom1 = shift;
- my $atom2 = shift;
- my $mdnCACA = shift;
- my $mdnNO = shift;
- my $mdnSCMC = shift;
- my $mdnSCSC = shift;
-
- my @mdnOutput;
- my @mdnInput = ($dtemplate, $posteriori, $sim);
- if ($rsrType == 9) {
- @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 10) {
- @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 23) {
- @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 26) {
- @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
- }
- my $p1 = $mdnOutput[0];
- my $p2 = $mdnOutput[1];
- my $mu1 = log($dtemplate) + $mdnOutput[2];
- my $mu2 = log($dtemplate) + $mdnOutput[3];
- my $sigma1 = $mdnOutput[4];
- my $sigma2 = $mdnOutput[5];
- # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
- #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
- #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
- my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f",$rsrType,$atom1,$atom2,$p1,$mu1,$sigma1,$p2,$mu2,$sigma2);
-
-
- return $newline;
- }
- ##################################################################
- ## input: self explanatory, see arguments
- ## output: constraint line as needed for modeller constraint file
- ##
- ## description:
- ## the parameters for the new constraint, are calculated by a MDN
- ## parameters with index 2 are background parameters
- ##################################################################
- sub CalcRSRTweight{
- my $posteriori = shift;
- my $sim = shift;
- my $dtemplate = shift;
- my $rsrType = shift;
- my $atom1 = shift;
- my $atom2 = shift;
- my $mdnCACA = shift;
- my $mdnNO = shift;
- my $mdnSCMC = shift;
- my $mdnSCSC = shift;
- my $tweight = shift;
-
- my @mdnOutput;
- my @mdnInput = ($dtemplate, $posteriori, $sim);
- if ($rsrType == 9) {
- @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 10) {
- @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 23) {
- @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 26) {
- @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
- }
- my $p1 = $mdnOutput[0];
- my $p2 = $mdnOutput[1];
- my $mu1 = log($dtemplate) + $mdnOutput[2];
- my $mu2 = log($dtemplate) + $mdnOutput[3];
- my $sigma1 = $mdnOutput[4];
- my $sigma2 = $mdnOutput[5];
- # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
- #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
- #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
- my $newline = sprintf("R 50 1 1 %3.0f 2 7 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f",$rsrType,$atom1,$atom2,$p1,$mu1,$sigma1,$p2,$mu2,$sigma2, $tweight);
-
-
- return $newline;
- }
- ##################################################################
- ## input: self explanatory, see arguments
- ## output: constraint line as needed for modeller constraint file
- ##
- ## description:
- ## the parameters for the new constraint, are calculated by a MDN
- ## instead of writing the parameters mu, mu_BG, sigma, sigma_BG,
- ## w, w_BG into the restraints file, use precalculated parameters
- ## a,b,c in order to save computing steps when Modeller optimization
- ## runs
- ## parameters with index 2 are background parameters
- ##################################################################
- sub CalcRSRefficient {
- my $posteriori = shift;
- my $sim = shift;
- my $dtemplate = shift;
- my $rsrType = shift;
- my $atom1 = shift;
- my $atom2 = shift;
- my $mdnCACA = shift;
- my $mdnNO = shift;
- my $mdnSCMC = shift;
- my $mdnSCSC = shift;
-
- my @mdnOutput;
- my @mdnInput = ($dtemplate, $posteriori, $sim);
- if ($rsrType == 9) {
- @mdnOutput = $mdnCACA->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 10) {
- @mdnOutput = $mdnNO->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 23) {
- @mdnOutput = $mdnSCMC->getMixtureParametersFromMDN(@mdnInput);
- }
- elsif ($rsrType == 26) {
- @mdnOutput = $mdnSCSC->getMixtureParametersFromMDN(@mdnInput);
- }
- my $p1 = $mdnOutput[0];
- my $p2 = $mdnOutput[1];
- my $mu1 = log($dtemplate) + $mdnOutput[2];
- my $mu2 = log($dtemplate) + $mdnOutput[3];
- my $sigma1 = $mdnOutput[4];
- my $sigma2 = $mdnOutput[5];
- # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
- #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
- #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
- my $sigma1sq = $sigma1*$sigma1;
- my $sigma2sq = $sigma2*$sigma2;
- my $factorOne = 2*$sigma1sq*$sigma2sq;
- my $a = $p1/$p2 * $sigma2/$sigma1;
- my $b = ($sigma2sq - $sigma1sq)/$factorOne;
- my $factorTwo = ($sigma2sq*$mu1 - $sigma1sq*$mu2);
- my $c = $factorTwo/($sigma2sq - $sigma1sq);
- my $d = $factorTwo*$factorTwo/($factorOne*($sigma2sq - $sigma1sq)) - $factorTwo/$factorOne;
- my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f\n",$rsrType,$atom1,$atom2,$a, $b, $c, $d);
-
- return $newline;
- }
- sub CalcRSRPrev{
- my $probi = $_[0];
- my $probj = $_[1];
- my $sim = $_[2];
- my $dtemplate = $_[3];
- my $rsrType = $_[4];
- my $atom1 = $_[5];
- my $atom2 = $_[6];
-
- #distance Template < cutoff
- # avPI avSIM avDELTA
- # CACAlog 0.78950 0.47877 2.29587
- # NOlog 0.79391 0.47610 2.04427
- # SCMClog 0.81810 0.50638 1.50130
- # SCSClog 0.87671 0.59417 1.46639
- my @Avec; #Parametervalues: a0 a1 a2 a3 b0 b1 b2 b3 b4 b5 b6 b7 b8 b9 c0 c1 c2 c3 c4 c5 d0 d1 d2 d3 d4 d5
- my $PI;
- my $Sim;
- my $deltatemplate;
-
- if ($rsrType==9){
- @Avec = (-2.6961,-0.7186,0.1326 ,-0.6153,0.6712 ,0.7634 ,0.1174 ,0.0353 ,0.3422 ,-0.2807,0.2447 ,0.0463 ,-0.0171,0.1796 ,-1.0388,-0.1192,-0.3310,0.0900 ,0.2468 ,-0.2058,0.1296 ,-0.1788,-0.1989,0.0080 ,0.3942 ,0.1421);
- $PI= $probi*$probj - 0.7895;
- $Sim = $sim - 0.4788;
- $deltatemplate = log($dtemplate)- 2.29587;
- }
- elsif ($rsrType==10){
- @Avec = (-2.6514,-0.5396,-0.0368,-0.5819,0.6323 ,0.8551 ,0.0992 ,-0.0011,0.4606 ,0.0202 ,0.2238 ,0.1218 ,-0.0396,0.0448 ,-0.9667,-0.2518,-0.4197,0.0653 ,0.2507 ,-0.2392,0.1500 ,-0.0833,-0.2595,-0.0224,0.3706 ,0.1158);
- $PI= $probi*$probj - 0.7940;
- $Sim = $sim - 0.4763;
- $deltatemplate = log($dtemplate)- 2.04427;
- }
- elsif($rsrType==23){
- @Avec = (-3.4835,-0.1144,-0.6024,-0.4093,0.5841 ,0.4945 ,0.0102 ,-0.4040,0.3905 ,0.1852 ,0.0844 ,0.0213 ,0.0668 ,-0.3501,-1.6170,-0.8439,-0.2171,0.0006 ,-0.1970,-0.9967,0.0843 ,-0.2868,-0.0493,-0.0108,0.0096 ,0.3162);
- $PI= $probi*$probj - 0.8893;
- $Sim = $sim - 0.6281;
- $deltatemplate = log($dtemplate)- 1.50130;
- }
- elsif($rsrType==26){
- @Avec = (-2.3377,-0.3537,1.9132 ,-0.1555,0.5179 ,0.6930 ,0.1961 ,0.9687 ,0.3331 ,-0.0203,0.6672 ,-0.1298,-0.0680,1.0575 ,-0.9797,0.2624 ,-0.5030,0.1753 ,1.0932 ,1.6404 ,0.3804 ,-0.2516,-0.5884,-0.0289,0.5928 ,0.3932);
- $PI= $probi*$probj - 0.9373;
- $Sim = $sim - 0.7220;
- $deltatemplate = log($dtemplate)- 1.46639;
- }
- my $A = $Avec[0] + $Avec[1] *$Sim + $Avec[2] *$deltatemplate;
- my $B = $Avec[3];
- my $logsigma = $A + $B*$PI;
-
- my $p = $Avec[4]+$Avec[5]*$PI+$Avec[6]*$Sim+$Avec[7]*$deltatemplate+$Avec[8]*$PI*$PI+$Avec[9]*$PI*$Sim+$Avec[10]*$PI*$deltatemplate+$Avec[11]*$Sim*$deltatemplate+$Avec[12]*$Sim*$Sim+$Avec[13]*$deltatemplate*$deltatemplate;
- if ($p>1) {$p=1;}elsif ($p<0) {$p=0;}
-
- my $logsigmaBG = $Avec[14] + $Avec[15]*$deltatemplate + $Avec[16]*$PI + $Avec[17]*$Sim + $Avec[18]*$PI*$deltatemplate + $Avec[19]*$deltatemplate*$deltatemplate;
- my $logmuBG = $Avec[20] + $Avec[21]*$deltatemplate + $Avec[22]*$PI + $Avec[23]*$Sim + $Avec[24]*$PI*$deltatemplate + $Avec[25]*$deltatemplate*$deltatemplate;
-
- #for Modeller Restraints:
- my $p1 = sprintf("%.5f" ,$p);
- my $p2 = sprintf("%.5f" ,(1-$p1));
- my $sigma = exp($logsigma);
- $sigma = sprintf("%.5f" ,$sigma);
- my $sigmaBG = exp($logsigmaBG);
- $sigmaBG = sprintf("%.5f" ,$sigmaBG);
- my $mu = log($dtemplate);
- $mu = sprintf("%.5f" ,$mu);
- my $muBG = $logmuBG + $mu;
- $muBG = sprintf("%.5f" ,$muBG);
-
- # Form Modality Feature Group Numb_atoms Numb_parameters Numb_Feat Atom_indices Parameter(e.g. mean, standard deviation)
- #R 3 1 1 1 2 2 1 3 2 1.5000 0.0364
- #R 50 1 1 9|10|23|26 2 6 1 .. .. ... ... ... ... ... ...
- my $newline = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n",$rsrType,$atom1,$atom2,$p1,$mu,$sigma,$p2,$muBG,$sigmaBG);
-
- return $newline;
- }
- sub searchAlternatives{
- #alternatives from /cluster/bioprogs/modeller/modlib/atmcls-melo.lib
- my $Residue = $_[0];
- my $Atom = $_[1];
- #print "\s$Residue:$Atom\s\n";
- my @ATMGRP01 = (' ALA:CB ', ' ILE:CG2 ', ' ILE:CD1 ', ' ILE:CD ', ' LEU:CD1 ', ' LEU:CD2 ', ' THR:CG2 ', ' VAL:CG1 ', ' VAL:CG2 ');
- my @ATMGRP02 = (' ILE:CB ', ' LEU:CG ', ' VAL:CB ');
- my @ATMGRP03 = (' ARG:CB ', ' ARG:CG ', ' ASN:CB ', ' ASP:CB ', ' GLU:CB ', ' GLU:CG ', ' ILE:CG1 ', ' GLN:CB ', ' GLN:CG ', ' HIS:CB ', ' HSD:CB ', ' LEU:CB ', ' LYS:CB ', ' LYS:CG ', ' LYS:CD ', ' MET:CB ', ' PHE:CB ', ' PRO:CB ', ' PRO:CG ', ' TRP:CB ', ' TYR:CB ');
- my @ATMGRP04 = (' PHE:CG ', ' TRP:CD2 ', ' TYR:CG ');
- my @ATMGRP05 = (' PHE:CD1 ', ' PHE:CD2 ', ' PHE:CE1 ', ' PHE:CE2 ', ' PHE:CZ ', ' TRP:CE3 ', ' TRP:CZ2 ', ' TRP:CZ3 ', ' TRP:CH2 ', ' TYR:CD1 ', ' TYR:CD2 ', ' TYR:CE1 ', ' TYR:CE2 ');
- my @ATMGRP06 = (' SER:OG ', ' THR:OG1 ');
- my @ATMGRP07 = (' ASN:ND2 ', ' GLN:NE2 ');
- my @ATMGRP08 = (' CYS:SG ', ' CSS:SG ');
- my @ATMGRP09 = (' ARG:NH1 ', ' ARG:NH2 ');
- my @ATMGRP10 = (' HIS:CG ', ' HSD:CG ');
- my @ATMGRP11 = (' HIS:CD2 ', ' HSD:CD2 ', ' TRP:CD1 ');
- my @ATMGRP12 = (' HIS:NE2 ', ' HSD:NE2 ');
- my @ATMGRP13 = (' HIS:CE1 ', ' HSD:CE1 ');
- my @ATMGRP14 = (' ASP:CG ', ' GLU:CD ');
- my @ATMGRP15 = (' ALA:OXT ', ' ARG:OXT ', ' ASN:OXT ', ' ASP:OXT ', ' CYS:OXT ', ' CSS:OXT ', ' GLU:OXT ', ' GLN:OXT ', ' GLY:OXT ', ' HIS:OXT ', ' HSD:OXT ', ' ILE:OXT ', ' LEU:OXT ', ' LYS:OXT ', ' MET:OXT ', ' PHE:OXT ', ' PRO:OXT ', ' SER:OXT ', ' THR:OXT ', ' TRP:OXT ', ' TYR:OXT ', ' VAL:OXT ', ' ASP:OD1 ', ' ASP:OD2 ', ' ASP:OT1 ', ' ASP:OT2 ', ' GLU:OE1 ', ' GLU:OE2 ', ' GLU:OT1 ', ' GLU:OT2 ');
- my @ATMGRP16 = (' CYS:CB ', ' CSS:CB ', ' MET:CG ');
- my @ATMGRP17 = (' ASN:CG ', ' GLN:CD ');
- my @ATMGRP18 = (' ASN:OD1 ', ' GLN:OE1 ');
- my @ATMGRP19 = (' HIS:ND1 ', ' HSD:ND1 ');
-
- my @alternatives;
- if (grep(/\s$Residue:$Atom\s/,@ATMGRP01)){@alternatives=@ATMGRP01;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP02)){@alternatives=@ATMGRP02;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP03)){@alternatives=@ATMGRP03;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP04)){@alternatives=@ATMGRP04;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP05)){@alternatives=@ATMGRP05;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP06)){@alternatives=@ATMGRP06;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP07)){@alternatives=@ATMGRP07;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP08)){@alternatives=@ATMGRP08;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP09)){@alternatives=@ATMGRP09;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP10)){@alternatives=@ATMGRP10;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP11)){@alternatives=@ATMGRP11;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP12)){@alternatives=@ATMGRP12;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP13)){@alternatives=@ATMGRP13;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP14)){@alternatives=@ATMGRP14;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP15)){@alternatives=@ATMGRP15;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP16)){@alternatives=@ATMGRP16;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP17)){@alternatives=@ATMGRP17;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP18)){@alternatives=@ATMGRP18;}
- elsif (grep(/\s$Residue:$Atom\s/,@ATMGRP19)){@alternatives=@ATMGRP19;}
-
- return @alternatives;
- }
- sub CalcWnew {
- my $d = shift;
- my $p1 = shift;
- my $mu1 = shift;
- my $sigma1 = shift;
- my $p2 = shift;
- my $mu2 = shift;
- my $sigma2 = shift;
- my $correct = $p1 / $sigma1 * exp( -0.5 * (($d - $mu1)/$sigma1)*(($d - $mu1)/$sigma1) );
- my $bg = $p2 / $sigma2 * exp(-0.5 * (($d - $mu2)/$sigma2)*(($d - $mu2)/$sigma2) );
- return $correct/($correct + $bg);
- }
- ########################################################################
- ## input: rsrfile: name of restraints file (full path)
- ## models: pdb models generated in round before (full path)
- ##
- ## output: generates a new restraints file called:
- ## OLDRESTRAINTSFILE.iter
- ## this file is then to be used for another round of Modeller
- ## model generation
- ##
- ## description: assume that a previous Modeller run has generated
- ## e.g. 4 models. Now, one wants to use the information contained in
- ## the generated models.
- ## For all models, there is a single rsr file. The generated pdb-files
- ## (models) are "realizations" of these contraints, e.g. take a distance
- ## restraint for CACA between residues 4 and 39 in a target.
- ## Each model has now a concrete value for the distance between 4 and 39
- ## Interpret now w as:
- ## w = P(correct_iji'j' | d_ij, d'_i'j', A_ij) = wN(log(d_ij)|..) / (wN(log(d_ij)|..) + (1-w)N(log(d'_ij)|..)
- ## and calculate w for each model.
- ## Take as new w the median of all these w.
- ########################################################################
- sub UpdateRsrFromModels {
- my $rsrfile = shift;
- my @models = @_;
- my @pdbs;
- ## copy pdb files and rename the copied files, because next modeller round will
- ## generate new files named as the old ones
- for (my $i=0; $i<@models; $i++) {
- system("cp $models[$i] $models[$i].prev");
- $models[$i] .= ".prev";
- }
- ## read in models
- ## atomnr -> x,y,z
- for (my $i=0; $i<@models; $i++) {
- my %structureInfo;
- if (not defined( open (MH, "< $models[$i]"))) {
- print "Cant open $models[$i]. Skipping $models[$i]...\n";
- next;
- }
- while(<MH>) {
- if (/ATOM.{2}\s*(\d+)\s.([\S|\s]{3}).(\S{3})\s.\s*(\d+)[\s|\D]\s*(-?\d+.\d+)\s*(-?\d+.\d+)\s*(-?\d+.\d+)/) {
- ## $atomnr x y z
- $structureInfo{$1} = [$5, $6, $7];
- }
- }
- close(MH);
- ## save atomnr->[x,y,z] hash for each model
- $pdbs[$i] = \%structureInfo;
- }
- open (RH, "< $rsrfile") or die "Cant open $rsrfile: $!\n";
- my @newRestraints;
- ## go through restraints and for each (CACA,NO,SCMC,SCSC distance) restraint calculate the acutal distance
- ## in each model and calculate new w for the restraint r. Replace r by new restraint (new w and 1-w)
- while (my $rsr = <RH>) {
- ## 50 1 1 9 2 6 1 289 305 0.2070 1.8664 0.1161 0.7930 2.0669 0.4316
- ## atom1 atom2 p1 mu1 sigma1 p2 mu2 sigma2
- if ($rsr =~ /^R\s+50\s+\S+\s+\S+\s+(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/) {
- my $rsrtype = $1;
- my $atom1 = $2;
- my $atom2 = $3;
- my $p1 = $4;
- my $mu1 = $5;
- my $sigma1 = $6;
- my $p2 = $7;
- my $mu2 = $8;
- my $sigma2 = $9;
- my @allWs;
- my $finalNewW;
- for (my $i=0; $i<@models; $i++) {
- ## coordinates of atom1 and atom2
- my @coAtom1 = @{$pdbs[$i]{$atom1}};
- my @coAtom2 = @{$pdbs[$i]{$atom2}};
- my $distance = sqrt( ($coAtom1[0]-$coAtom2[0])*($coAtom1[0]-$coAtom2[0]) +($coAtom1[1]-$coAtom2[1])*($coAtom1[1]-$coAtom2[1]) +($coAtom1[2]-$coAtom2[2])*($coAtom1[2]-$coAtom2[2]) );
- my $wUpdated = &CalcWnew(log($distance), $p1, $mu1, $sigma1, $p2, $mu2, $sigma2);
- push(@allWs, $wUpdated);
- }
- ## calculate the median of all new w:
- @allWs = sort {$a <=> $b} @allWs;
-
- if (@allWs % 2 == 0 and @allWs >= 2) {
- $finalNewW = ($allWs[@allWs/2-1] + $allWs[@allWs/2]) / 2;
- }
- else {
- $finalNewW = $allWs[@allWs/2];
- }
- ## update restraint and add it to the new restraints list
- my $newrsr = sprintf("R 50 1 1 %3.0f 2 6 1 %5.0f %5.0f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f\n", $rsrtype, $atom1, $atom2, $finalNewW, $mu1, $sigma1, (1-$finalNewW), $mu2, $sigma2);
- push(@newRestraints, $newrsr);
- }
- else {
- ## keep other restraints
- push (@newRestraints, $rsr);
- }
- }
- close(RH);
- ## save new restraints in a new file
- my $newRsrFileName = $rsrfile . ".iter";
- open (IH, "> $newRsrFileName") or die ("Cant write $newRsrFileName: $!\n");
- print (IH @newRestraints);
- close(IH);
- }
- sub GenerateIterativeModels {
- my $myrsr = shift;
- my $queryName = shift;
- my $tmpdir = shift;
- my $inbase = shift;
- my $outbase = shift;
- &System("cp $inbase.B9999000*.pdb $tmpdir");
- &System("mv $tmpdir/$queryName.B99990003.pdb $tmpdir/$queryName.B99990004.pdb");
- &System("mv $tmpdir/$queryName.B99990002.pdb $tmpdir/$queryName.B99990003.pdb");
- &System("mv $tmpdir/$queryName.B99990001.pdb $tmpdir/$queryName.B99990002.pdb");
- &ModellerNew($myrsr, $queryName, $inbase, $outbase, $tmpdir, 1);
- my @models = <$tmpdir/$queryName.B9999000*.pdb>;
- &UpdateRsrFromModels($myrsr, @models);
- &ModellerNew("$myrsr.iter", $queryName, $inbase, $outbase, $tmpdir, 3);
- }
- ##########
- ## write out "final" pdb file in CASP compatible format
- ##
- ## outFile: filename of pdb file to write
- ## srcFile: filename of already existing (Modeller generated) pdb file
- ##
- sub WriteCASPpdbFile {
- my $outFile = shift @_;
- my $srcFile = shift @_;
- my $seqname = shift @_;
- my $server = shift @_;
- my $tList = shift @_;
- my $templates = "";
- my $predTMs = "";
- for (my $i=0; $i<$tList->size(); $i++) {
- $templates .= $tList->get($i)->get_Hit() . " ";
- $predTMs .= sprintf("%.2f", $tList->get($i)->get_predTM()) . " ";
- }
- open(SRC, "< $srcFile") or die ("WriteCASPpdbFile: Cant open $srcFile: $!\n");
- my @mod = <SRC>;
- close(SRC);
- my @coordinates;
- ## skip all lines until first ATOM record is read
- for (my $i=0; $i<@mod; $i++) {
- if ($mod[$i] =~ /^ATOM/) {
- push (@coordinates, $mod[$i]);
- }
- }
- open(PDB, ">$outFile") or die ("WriteCASPpdbFile: Cant open $outFile: $!\n");
- print(PDB "PFRMAT TS\n");
- print(PDB "TARGET $seqname\n");
- print(PDB "AUTHOR $server\n");
- print(PDB "METHOD $server (http://hhpred.tuebingen.mpg.de)\n");
- print(PDB "MODEL 1\n");
- print(PDB "PARENT $templates\n");
- print(PDB "REMARK PredTM $predTMs\n");
- print(PDB @coordinates);
- print(PDB "TER\n");
- print(PDB "END\n");
- close(PDB);
- FixOccupancy::FixOccupancy($outFile);
- }
- 1;
|