utilities.pm 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870
  1. #!/usr/bin/perl -w
  2. package utilities;
  3. use strict;
  4. use config;
  5. require Exporter;
  6. our @ISA = qw(Exporter);
  7. our @EXPORT = qw(System normalizeToOne sigmoid normalizeToOneHash Three2OneLetter One2ThreeLetter getTMscoreFrom BuildTabFiles BuildSingleTabFile max min KMP match_all_positions remove_ranges mean sd sample euklid_dist verbose get_dirname get_basename sumProbLen get_PDB_chain get_seq_len get_HMM_len trim ltrim rtrim TMalignBetween TMscoreBetween TMalignIDBetween hashToStr get_neff_from_hhm getSSPredFromHHM getCoverageApprox randBetween printMatrix printHash symmetricMinMatrix logistic scalarProduct PosteriorsFromTabFile getRandomString);
  8. my $config = HHpredConfig->instance();
  9. sub randBetween {
  10. my $lower = shift;
  11. my $upper = shift;
  12. my $rnum = int($lower + rand($upper-$lower+1));
  13. return $rnum;
  14. }
  15. sub isInRange {
  16. my $number = shift;
  17. my $lower = shift;
  18. my $upper = shift;
  19. if ($number >= $lower && $number <= $upper) { return 1; }
  20. return 0;
  21. }
  22. sub logistic {
  23. my $x = shift;
  24. return 1.0/(1.0 + exp(-$x));
  25. }
  26. sub scalarProduct {
  27. my ($xPtr, $yPtr) = @_;
  28. return -1 if (scalar(@$xPtr) != scalar(@$yPtr));
  29. my $result = 0;
  30. for (my $i=0; $i<scalar(@$xPtr); $i++) {
  31. $result += $xPtr->[$i] * $yPtr->[$i];
  32. }
  33. return $result;
  34. }
  35. ## normalize entries in array to one
  36. ## assumes that all entries are non-negative
  37. sub normalizeToOne {
  38. my @a = @_;
  39. my $sum = 0;
  40. for (my $i=0; $i<@a; $i++) {
  41. $sum += $a[$i];
  42. }
  43. if ($sum == 0) {
  44. print "normalizeToOne: Warning: normalizer equals zero!\n";
  45. return @a;
  46. }
  47. for (my $i=0; $i<@a; $i++) {
  48. $a[$i] /= $sum;
  49. }
  50. return @a;
  51. }
  52. sub printMatrix {
  53. my $matrixPtr = shift;
  54. my $matrixName = shift || "";
  55. my @matrix = @{$matrixPtr};
  56. if ($matrixName ne "") {
  57. print "$matrixName:\n";
  58. }
  59. for (my $i=0; $i<scalar(@matrix); $i++) {
  60. for (my $j=0; $j<scalar(@{$matrix[$i]}); $j++) {
  61. print "$matrix[$i][$j] ";
  62. }
  63. print "\n";
  64. }
  65. }
  66. sub printHash {
  67. my $hashPtr = shift;
  68. my $inRow = 0;
  69. if (defined $_[1]) { $inRow = 1; }
  70. my %hash = %{$hashPtr};
  71. foreach my $key (sort keys %hash) {
  72. print "$key => $hash{$key}";
  73. ($inRow == 0) ? print "\n" : print " ";
  74. }
  75. }
  76. ## transform matrix into a symmetric one by always
  77. ## taking the minimum of two corresponding entries
  78. sub symmetricMinMatrix {
  79. my $matrixPtr = shift;
  80. for (my $i=0; $i<scalar(@{$matrixPtr}); $i++) {
  81. for (my $j=$i+1; $j<@{$matrixPtr->[$i]}; $j++) {
  82. # next if ($i == $j);
  83. $matrixPtr->[$j][$i] = $matrixPtr->[$i][$j] if ($matrixPtr->[$i][$j] < $matrixPtr->[$j][$i]);
  84. $matrixPtr->[$i][$j] = $matrixPtr->[$j][$i] if ($matrixPtr->[$j][$i] < $matrixPtr->[$i][$j]);
  85. }
  86. }
  87. }
  88. sub normalizeToOneHash {
  89. my $hashref = shift;
  90. my %hash = %$hashref;
  91. my $sum = 0;
  92. foreach my $key (keys %hash) {
  93. $sum += $hash{$key};
  94. }
  95. if ($sum == 0) {
  96. print "normalizeToOneHash: Warning: normalizer equals zero!\n";
  97. return %hash;
  98. }
  99. foreach my $key (keys %hash) {
  100. $hash{$key} /= $sum;
  101. }
  102. return %hash;
  103. }
  104. sub sigmoid {
  105. my $val = shift;
  106. if ($val < -15.0) {
  107. return 0.0;
  108. }
  109. elsif ($val > 15.0) {
  110. return 1.0;
  111. }
  112. else {
  113. return (1.0 / (1.0 + exp(-$val)));
  114. }
  115. }
  116. ##################################################################################
  117. # Convert three-letter amino acid code into one-letter code
  118. ##################################################################################
  119. sub Three2OneLetter {
  120. my $res = uc($_[0]);
  121. if ($res eq "GLY") {return "G";}
  122. elsif ($res eq "ALA") {return "A";}
  123. elsif ($res eq "VAL") {return "V";}
  124. elsif ($res eq "LEU") {return "L";}
  125. elsif ($res eq "ILE") {return "I";}
  126. elsif ($res eq "MET") {return "M";}
  127. elsif ($res eq "PHE") {return "F";}
  128. elsif ($res eq "TYR") {return "Y";}
  129. elsif ($res eq "TRP") {return "W";}
  130. elsif ($res eq "ASN") {return "N";}
  131. elsif ($res eq "ASP") {return "D";}
  132. elsif ($res eq "GLN") {return "Q";}
  133. elsif ($res eq "GLU") {return "E";}
  134. elsif ($res eq "CYS") {return "C";}
  135. elsif ($res eq "PRO") {return "P";}
  136. elsif ($res eq "SER") {return "S";}
  137. elsif ($res eq "THR") {return "T";}
  138. elsif ($res eq "LYS") {return "K";}
  139. elsif ($res eq "HIS") {return "H";}
  140. elsif ($res eq "ARG") {return "R";}
  141. elsif ($res eq "ASX") {return "D";}
  142. elsif ($res eq "GLX") {return "E";}
  143. elsif ($res eq "MSE") {return "M";} # SELENOMETHIONINE
  144. elsif ($res eq "SEP") {return "S";} # PHOSPHOSERINE
  145. elsif ($res eq "SEC") {return "C";} # SELENOCYSTEINE
  146. elsif ($res eq "TPO") {return "T";} # PHOSPHOTHREONINE
  147. elsif ($res eq "TYS") {return "Y";} # SULFONATED TYROSINE
  148. elsif ($res eq "KCX") {return "K";} # LYSINE NZ-CARBOXYLIC ACID
  149. else {return "X";}
  150. }
  151. sub One2ThreeLetter {
  152. my $res = uc($_[0]);
  153. if ($res eq "G") {return "GLY";}
  154. elsif ($res eq "A") {return "ALA";}
  155. elsif ($res eq "V") {return "VAL";}
  156. elsif ($res eq "L") {return "LEU";}
  157. elsif ($res eq "I") {return "ILE";}
  158. elsif ($res eq "M") {return "MET";}
  159. elsif ($res eq "F") {return "PHE";}
  160. elsif ($res eq "Y") {return "TYR";}
  161. elsif ($res eq "W") {return "TRP";}
  162. elsif ($res eq "N") {return "ASN";}
  163. elsif ($res eq "D") {return "ASP";}
  164. elsif ($res eq "Q") {return "GLN";}
  165. elsif ($res eq "E") {return "GLU";}
  166. elsif ($res eq "C") {return "CYS";}
  167. elsif ($res eq "P") {return "PRO";}
  168. elsif ($res eq "S") {return "SER";}
  169. elsif ($res eq "T") {return "THR";}
  170. elsif ($res eq "K") {return "LYS";}
  171. elsif ($res eq "H") {return "HIS";}
  172. elsif ($res eq "R") {return "ARG";}
  173. elsif ($res eq "U") {return "SEC";}
  174. elsif ($res eq "B") {return "ASX";}
  175. elsif ($res eq "Z") {return "GLX";}
  176. else {return "UNK";}
  177. }
  178. ## get (approximated) coverage of query by template at index idx in tlist
  179. sub getCoverageApprox {
  180. my $tlist = shift;
  181. my $idx = shift;
  182. if ($idx < 0 || $idx >= $tlist->get_queryLength()) {
  183. return -1;
  184. }
  185. my $range = $tlist->get($idx)->get_Qend() - $tlist->get($idx)->get_Qstart();
  186. ## gaps within this region are not considered => approximation
  187. my $coverageApprox = $range / $tlist->get_queryLength();
  188. return $coverageApprox;
  189. }
  190. ## extract TMscore from TMscore/TMalign-output file
  191. sub getTMscoreFrom {
  192. my $file = shift;
  193. my $tmscore = -1;
  194. open (TM, "< $file") or die "Cant open $file";
  195. while(my $line = <TM>) {
  196. if ($line =~ /TM-score\s*=\s*(\d+(\.\d+)?)/) {
  197. $tmscore = $1;
  198. last;
  199. }
  200. }
  201. close(TM);
  202. return $tmscore;
  203. }
  204. ## TMalign between two given structures
  205. sub TMalignBetween {
  206. my $strucFileOne = shift;
  207. my $strucFileTwo = shift;
  208. my $options = shift || " ";
  209. my $TMalign = shift || $config->get_TMalign();
  210. my $TMalignOutput = `$TMalign $strucFileOne $strucFileTwo $options`;
  211. my $TMscore = -1;
  212. if ($TMalignOutput =~ /TM-score\s*=\s*(\S+),/) {
  213. $TMscore = $1;
  214. }
  215. if ($TMscore == -1) {
  216. print "WARNING: TMalignBetween could not find TMscore in TMalign output!\n";
  217. }
  218. return $TMscore;
  219. }
  220. ## TMalign (TMscore and sequence identity) between two given structures
  221. sub TMalignIDBetween {
  222. my $strucFileOne = shift;
  223. my $strucFileTwo = shift;
  224. my $options = shift || " ";
  225. my $TMalign = shift || $config->get_TMalign();
  226. my $TMalignOutput = `$TMalign $strucFileOne $strucFileTwo $options`;
  227. my $TMID = -1;
  228. my $TMscore = -1;
  229. if ($TMalignOutput =~ /TM-score\s*=\s*(\S+),\s*ID\s*=\s*(\S+)/) {
  230. $TMscore = $1;
  231. $TMID = $2;
  232. }
  233. if ($TMscore == -1) {
  234. print "WARNING: TMalignBetween could not find TMscore in TMalign output!\n";
  235. }
  236. return ($TMscore, $TMID);
  237. }
  238. ## TMalign (TMscore and sequence identity) between two given structures
  239. sub TMscoreBetween {
  240. my $modelFile = shift;
  241. my $nativeFile = shift;
  242. my $options = shift || " ";
  243. my $TMscore = shift || $config->get_TMscore();
  244. my $TMscoreOutput = `$TMscore $modelFile $nativeFile $options`;
  245. my $score = -1;
  246. if ($TMscoreOutput =~ /TM-score\s*=\s*(\d+(\.\d+)?)/) {
  247. $score = $1;
  248. }
  249. if ($score == -1) {
  250. print "WARNING: TMscoreBetween could not find TMscore in TMscore output!\n";
  251. }
  252. return $score;
  253. }
  254. ##################################################################################
  255. ## tabfile: tabfile containing all tabs-results (created by hhsearch options -atab)
  256. ## outbase: where to write the separate tab-files
  257. ## maxhits: how many hits to write (def: in fact all)
  258. ##
  259. ## the input tabfile contains tab-entries (i.e. i j sim probab) for each template
  260. ## of the initial hhsearch. The order is as in initial hhr file.
  261. ##
  262. ## the created tab files (containing posteriori-prob-
  263. ## abilities) are saved in outbase.HITtemplateStartStop.tab,
  264. ## where start: first residue
  265. ## stop: last residue
  266. ## it might be that a template is aligned more than one times at different
  267. ## positions. One can
  268. ##################################################################################
  269. sub BuildTabFiles {
  270. my $tabfile = shift;
  271. my $outbase = shift;
  272. my $maxHits = shift;
  273. $maxHits = defined($maxHits) ? $maxHits : 1000;
  274. open (TH, "< $tabfile") or die "Cant open $tabfile: $!\n";
  275. my $hitnr = 1;
  276. while (my $line = <TH>) {
  277. next if ($line =~ /^\s*i\s+j/);
  278. ## new template
  279. if ($line =~ />(\S+)/) {
  280. if ($hitnr > 1) { close(HH); }
  281. ## write a new tabfile
  282. my $singleTabFile = "$outbase.$1.tab";
  283. if ($hitnr > $maxHits) { last; }
  284. $hitnr++;
  285. open (HH, "> $singleTabFile") or die "Cant open $singleTabFile: $!\n";
  286. next;
  287. }
  288. ## i j score ss probab [dssp]
  289. if ($line =~ /^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+/) {
  290. print (HH $line);
  291. }
  292. }
  293. close(TH);
  294. }
  295. ############################################################################
  296. ## see subroutine BuildTabFiles
  297. ## this one creates only a single tab-file - the hitnr-th in the tabfile
  298. ############################################################################
  299. sub BuildSingleTabFile {
  300. my $tabfile = shift;
  301. my $hitnr = shift;
  302. my $outbase = shift;
  303. open (TH, "< $tabfile") or die "Cant open $tabfile: $!\n";
  304. my $hit = 1;
  305. my $template = "";
  306. my $found = 0;
  307. while (my $line = <TH>) {
  308. next if ($line =~ /^\s*i\s+j/);
  309. ## begin of a new template
  310. if ($line =~ />(\S+)/) {
  311. ## already found => stop
  312. if ($found == 1) {
  313. close (HH);
  314. last;
  315. }
  316. ## found
  317. if ($found == 0 and $hitnr == $hit) {
  318. $found = 1;
  319. $template = $1;
  320. open (HH, "> $outbase.$1.HIT$hitnr.tab") or die "Cant open $outbase.$1.HIT$hitnr.tab: $!\n";
  321. next;
  322. }
  323. $hit++;
  324. }
  325. ## not yet found
  326. if ($found == 0) {
  327. next;
  328. }
  329. ## found
  330. else {
  331. ## i j score ss probab [dssp]
  332. if ($line =~ /^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+/) {
  333. print (HH "$line");
  334. }
  335. }
  336. }
  337. close(HH);
  338. close (TH);
  339. return $found;
  340. }
  341. ############################################################################
  342. ## see subroutine BuildTabFiles
  343. ## returns hash for one hit with query-residue => pp entries
  344. ############################################################################
  345. sub PosteriorsFromTabFile {
  346. my $tabfile = shift;
  347. my $hitnr = shift;
  348. open (TH, "< $tabfile") or die "Cant open $tabfile: $!\n";
  349. my $hit = 1;
  350. my $template = "";
  351. my $found = 0;
  352. my %QidxToPP;
  353. while (my $line = <TH>) {
  354. next if ($line =~ /^\s*i\s+j/);
  355. ## begin of a new template
  356. if ($line =~ />(\S+)/) {
  357. ## already found => stop
  358. if ($found == 1) {
  359. last;
  360. }
  361. ## found
  362. if ($found == 0 and $hitnr == $hit) {
  363. $found = 1;
  364. $template = $1;
  365. next;
  366. }
  367. $hit++;
  368. }
  369. ## not yet found
  370. if ($found == 0) {
  371. next;
  372. }
  373. ## found
  374. else {
  375. ## i j score ss probab [dssp]
  376. if ($line =~ /^\s*(\S+)\s+\S+\s+\S+\s+\S+\s+(\S+)/) {
  377. $QidxToPP{$1} = $2;
  378. }
  379. }
  380. }
  381. close (TH);
  382. return %QidxToPP;
  383. }
  384. sub max {
  385. my $max = shift;
  386. foreach (@_) {
  387. $max = $_ if ($_ > $max);
  388. }
  389. return $max;
  390. }
  391. sub min {
  392. my $min = shift;
  393. foreach (@_) {
  394. $min = $_ if ($_ < $min);
  395. }
  396. return $min;
  397. }
  398. sub mean {
  399. my @array = @_;
  400. my $sum = 0;
  401. for (my $i=0; $i<@array; $i++) {
  402. $sum += $array[$i];
  403. }
  404. return $sum/scalar(@array);
  405. }
  406. sub sd {
  407. my @array = @_;
  408. my $mean = &mean(@array);
  409. my $N = scalar(@array);
  410. my $var = 0;
  411. for (my $i=0; $i<@array; $i++) {
  412. $var += ($array[$i] - $mean)*($array[$i] - $mean)
  413. }
  414. $var *= 1.0/$N;
  415. return sqrt($var);
  416. }
  417. ## sample a bin where probability of each bin is given in "probs"
  418. ## sum(probs) must be 1
  419. sub sample {
  420. my @probs = @_;
  421. my $rand = rand();
  422. my $sum = 0;
  423. for (my $bin=0; $bin<@probs; $bin++) {
  424. $sum += $probs[$bin];
  425. if ($rand <= $sum) {
  426. return $bin;
  427. }
  428. }
  429. return 0;
  430. }
  431. sub hashToStr {
  432. my $hashPtr = shift;
  433. my %myhash = %{$hashPtr};
  434. my $str = "";
  435. foreach my $key (sort keys(%myhash)) {
  436. $str .= "$key=$myhash{$key}\n";
  437. }
  438. return $str;
  439. }
  440. ## calculate number of residues on which calculation
  441. ## of sumProbs is based (the ones which are aligned and
  442. ## which have dssp)
  443. sub sumProbLen {
  444. my $ss_dssp = shift;
  445. my $conf = shift;
  446. my $len = length($ss_dssp);
  447. if (length($ss_dssp) != length($conf)) {
  448. print "WARNING: sumProbLen length(ss_dssp) != length(conf)!\n";
  449. $len = &min(length($ss_dssp), length($conf));
  450. }
  451. my @ssDsspTok = split(//, $ss_dssp);
  452. my @confTok = split(//, $conf);
  453. my $sumProbLen = 0;
  454. for (my $i=0; $i<$len; $i++) {
  455. if ($ssDsspTok[$i] ne '-' && $confTok[$i] ne " ") {
  456. $sumProbLen++;
  457. }
  458. }
  459. return $sumProbLen;
  460. }
  461. sub euklid_dist {
  462. my $ref1 = shift;
  463. my $ref2 = shift;
  464. my $v = shift || 2;
  465. my @vec1 = @$ref1;
  466. my @vec2 = @$ref2;
  467. if ($v>=2) {
  468. if ($#vec1 != $#vec2) {
  469. print "ERROR: euklid_dist: vec1 and vec2 differ in length!\n";
  470. }
  471. }
  472. my $sum = 0;
  473. for (my $i=0; $i<@vec1; $i++) {
  474. $sum += ($vec1[$i] - $vec2[$i]) * ($vec1[$i] - $vec2[$i]);
  475. }
  476. return (sqrt($sum));
  477. }
  478. ## Knuth-Morris-Pratt algorithm
  479. ## returns index of first occurence of ss in st
  480. ## or -1 otherwise
  481. sub KMP {
  482. my $st = shift; ## text
  483. my $ss = shift; ## search string
  484. my $cs = shift || 0; ## case sensitivity
  485. if ($cs != 0) {
  486. $st = uc($st);
  487. $ss = uc($ss);
  488. }
  489. my @t = split(//, $st);
  490. my @s = split(//, $ss);
  491. my $n = scalar(@t);
  492. my $m = scalar(@s);
  493. ## compute borders
  494. my @borders;
  495. $borders[0] = -1;
  496. my $i = 0;
  497. $borders[1] = 0;
  498. for (my $j=2; $j<=$m; $j++) {
  499. while(($i>=0) && ($s[$i] ne $s[$j-1])) {
  500. $i = $borders[$i];
  501. }
  502. $i++;
  503. $borders[$j] = $i
  504. }
  505. ## search routine
  506. $i = 0;
  507. my $j = 0;
  508. while ($i <= $n - $m) {
  509. while($t[$i+$j] eq $s[$j]) {
  510. $j++;
  511. if ($j == $m) {
  512. return $i;
  513. }
  514. }
  515. $i = $i + ($j - $borders[$j]);
  516. $j = &max(0, $borders[$j]);
  517. }
  518. return -1;
  519. }
  520. ## given a string and a regex,
  521. ## give back all positions (start and end) where regex matches string
  522. sub match_all_positions {
  523. my ($regex, $string) = @_;
  524. my @ret;
  525. while ($string =~ /$regex/g) {
  526. push @ret, [ $-[0], $+[0] ];
  527. }
  528. return @ret;
  529. }
  530. sub trim($) {
  531. my $string = shift;
  532. $string =~ s/^\s+//;
  533. $string =~ s/\s+$//;
  534. return $string;
  535. }
  536. # Left trim function to remove leading whitespace
  537. sub ltrim($) {
  538. my $string = shift;
  539. $string =~ s/^\s+//;
  540. return $string;
  541. }
  542. # Right trim function to remove trailing whitespace
  543. sub rtrim($) {
  544. my $string = shift;
  545. $string =~ s/\s+$//;
  546. return $string;
  547. }
  548. ## setsPtr is a ptr to an array as
  549. ## generated by match_all_positions, i.e. an array of arrays
  550. ## with 2 elements (start, end)
  551. ## and returns start if: start <= idx < end
  552. ## or -1 if no such array is found
  553. sub set_of_idx {
  554. my $idx = shift;
  555. my $setsPtr = shift;
  556. my @sets = @$setsPtr;
  557. for (my $i=0; $i<@sets; $i++) {
  558. my $start = $sets[$i]->[0];
  559. my $end = $sets[$i]->[1];
  560. return ($start, $end) if ($start >= $idx && $end < $idx);
  561. }
  562. return (-1,-1);
  563. }
  564. ## remove ranges given in splits (generated by e.g. match_all_positions)
  565. ## from str and give back new
  566. sub remove_ranges {
  567. my $str = shift;
  568. my @splits = @_;
  569. my $result = "";
  570. my $start = 0;
  571. my $end = 0;
  572. for (my $i=0; $i<@splits; $i++) {
  573. my $gStart = $splits[$i]->[0];
  574. my $gEnd = $splits[$i]->[1];
  575. $end = $gStart;
  576. $result .= substr($str, $start, $end-$start);
  577. $start = $gEnd;
  578. }
  579. $result .= substr($str, $start);
  580. return $result;
  581. }
  582. sub get_basename {
  583. my $dirbasename = shift;
  584. $dirbasename =~ /^.*\/(\S+?)(\.\S+)?$/;
  585. my $basename = $1;
  586. return $basename;
  587. }
  588. sub get_dirname {
  589. my $dirbasename = shift;
  590. $dirbasename =~ /^(.*)\//;
  591. my $dirname = $1;
  592. return $dirname;
  593. }
  594. ## very simple: assumes name to look like 1a7j_A
  595. sub get_PDB_chain {
  596. my $name = shift;
  597. if ($name =~ /\S+\_(\S+)$/) {
  598. return $1;
  599. } else {
  600. print "WARNING utitlities.pm get_PDB_chain strange format!\n";
  601. return "";
  602. }
  603. }
  604. sub get_seq_len {
  605. my $seq = shift;
  606. chomp($seq);
  607. $seq =~ s/[\*-]//g;
  608. return length($seq);
  609. }
  610. sub get_neff_from_hhm {
  611. my $hhmFile = shift;
  612. my $neff = -1;
  613. open(HH, "< $hhmFile") or die ("Cant open $hhmFile: $!\n");
  614. while(my $line = <HH>) {
  615. if ($line =~ /^Neff\s+(\S+)/i) {
  616. $neff = $1;
  617. last;
  618. }
  619. }
  620. close(HH);
  621. return $neff;
  622. }
  623. sub get_HMM_len {
  624. my $hhmFile = shift;
  625. my $len = -1;
  626. open(HH, "< $hhmFile") or die ("Cant open $hhmFile: $!\n");
  627. while(my $line = <HH>) {
  628. if ($line =~ /^LENG\s+(\S+)/i) {
  629. $len = $1;
  630. last;
  631. }
  632. }
  633. close(HH);
  634. return $len;
  635. }
  636. sub verbose {
  637. my $level = shift;
  638. my $actLevel = shift;
  639. my $message = shift;
  640. if ($actLevel >= $level) { print "$message\n"; }
  641. }
  642. sub getSSPredFromHHM {
  643. my $hhmFile = shift;
  644. open(HHM, "< $hhmFile") or die "Cant open $hhmFile";
  645. my $ssFound = 0;
  646. my $sspred = "";
  647. while(my $line = <HHM>) {
  648. chomp($line);
  649. if ($line =~ /^>ss\_pred/) {
  650. $ssFound = 1;
  651. next;
  652. }
  653. if ($ssFound && $line =~ /^>/) {
  654. last;
  655. }
  656. next if ($ssFound == 0);
  657. $sspred .= $line;
  658. }
  659. close(HHM);
  660. return $sspred;
  661. }
  662. sub getRandomString {
  663. my $len = shift;
  664. my @chars = ("A".."Z", "a".."z");
  665. my $string = "";
  666. $string .= $chars[rand @chars] for 1..$len;
  667. return $string;
  668. }
  669. sub System {
  670. my $cmd = shift;
  671. print "$cmd\n";
  672. system("$cmd");
  673. }
  674. 1;