multithread.pl 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. #!/usr/bin/perl
  2. #
  3. # multithread.pl:
  4. # Run a command with different file names as arguments on multiple threads in parallel
  5. # Usage: multithread.pl <fileglob> '<command>' [-cpu <int>]
  6. #
  7. #
  8. # HHsuite version 3.0.0 (15-03-2015)
  9. #
  10. # Reference:
  11. # Remmert M., Biegert A., Hauser A., and Soding J.
  12. # HHblits: Lightning-fast iterative protein sequence searching by HMM-HMM alignment.
  13. # Nat. Methods, epub Dec 25, doi: 10.1038/NMETH.1818 (2011).
  14. # (C) Johannes Soeding, 2012
  15. # This program is free software: you can redistribute it and/or modify
  16. # it under the terms of the GNU General Public License as published by
  17. # the Free Software Foundation, either version 3 of the License, or
  18. # (at your option) any later version.
  19. # This program is distributed in the hope that it will be useful,
  20. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  21. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  22. # GNU General Public License for more details.
  23. # You should have received a copy of the GNU General Public License
  24. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  25. # We are very grateful for bug reports! Please contact us at [email protected]
  26. use lib $ENV{"HHLIB"}."/scripts";
  27. use HHPaths; # config file with path variables for nr, blast, psipred, pdb, dssp etc.
  28. use strict;
  29. use POSIX;
  30. # Variables
  31. my $cpu=8; # number of cpus to use
  32. my $parent_pid=$$; # main process id
  33. my $pid; # process id of child
  34. my %pid=(); # hash has all running PIDs as keys and the file name as data
  35. my $children=0; # number of child processes running
  36. my $options="";
  37. my $file;
  38. my $ifile=0;
  39. my $v=1;
  40. my $numerr=0;
  41. if (scalar(@ARGV)<2) {
  42. die("
  43. multithread.pl from HHsuite $VERSION
  44. Run a command for many files in parallel using multiple threads
  45. Usage: multithread.pl '<fileglob>' '<command>' [-cpu <int>] [-v {0,1,2}]
  46. <command> can include symbol
  47. \$file for the full filename, e.g. /tmp/hh/1c1g_A.a3m,
  48. \$name for the filename without extension, e.g. /tmp/hh/1c1g_A, and
  49. \$base for the filename without extension and path, e.g. 1c1g_A.
  50. -cpu <int> number of threads to launch (default = $cpu)
  51. -v {0,1,2} verbose mode (default = $v)
  52. Example: multithread.pl '*.a3m' 'hhmake -i \$file 1>\$name.log 2>>error.log' -cpu 16
  53. \n");
  54. }
  55. $|=1; # autoflush on
  56. my @files=glob($ARGV[0]);
  57. my $command=$ARGV[1];
  58. $SIG{'CHLD'}='IGNORE';
  59. $SIG{'USR1'}=\&ChildFinished;
  60. $SIG{'INT'} =\&KillAllProcesses;
  61. if (@ARGV>2) {
  62. $options.=join(" ",@ARGV[2..$#ARGV]);
  63. }
  64. # Set number of cpus to use
  65. if ($options=~s/-cpu\s*(\d+)\s*//g) {$cpu=$1;}
  66. if ($options=~s/-v\s*(\d+)\s*//g) {$v=$1;}
  67. # Warn if unknown options found
  68. if ($options!~/^\s*$/) {$options=~s/^\s*(.*?)\s*$/$1/g; print("WARNING: unknown options '$options'\n");}
  69. if ($v>=1) {print (scalar(@files)." files read in ...\n");}
  70. foreach $file (@files) {
  71. $ifile++;
  72. # All cpus occupied? -> wait for a cpu to become free
  73. if ($children>=$cpu) {
  74. if ($v>=2) {print("\nParent $$ is sleeping (children=$children) ");}
  75. my $count=0;
  76. while ($children>=$cpu) {
  77. if ($count++>=10) {
  78. $count=0;
  79. if ($v>=2) {print("\nProcesses running:");}
  80. $children=0;
  81. foreach $pid (keys(%pid)) {
  82. if (! kill(0,$pid)) { # kill($pid,0) returns false if process is dead (finished)
  83. if ($v>=2) {printf("\nPID %5.5s: %s is removed from process table",$pid,$pid{$pid});}
  84. delete($pid{$pid}); # remove process from hash of PIDs
  85. } else {
  86. if ($v>=2) {printf("\nPID %5.5s: %s",$pid,$pid{$pid});}
  87. $children++; # In case a USR1 signal was caught twice (??)
  88. }
  89. }
  90. if ($v>=2) {print("\n");}
  91. } else {
  92. if ($v==1) {print(".");}
  93. select(undef, undef, undef, 0.1); # sleep 0.1 seconds
  94. }
  95. }
  96. }
  97. if ($pid=fork()) {
  98. # Main process
  99. $children++;
  100. $pid{$pid}="$file ($ifile)";
  101. # Print out running processes and remove defunct ones
  102. select(undef, undef, undef, 0.1); # sleep 0.1 seconds
  103. } elsif (defined $pid) {
  104. # Child process
  105. my $name; # filename without extension
  106. my $base; # basename without path
  107. if ($file =~/(.*)\..*?$/) {$name=$1;} else {$name=$file;}
  108. if ($name =~/.*\/(.*?)$/) {$base=$1;} else {$base=$name;}
  109. my $lcommand = $command; # need local variable for thread
  110. $lcommand=~s/\$file/$file/g;
  111. $lcommand=~s/\$name/$name/g;
  112. $lcommand=~s/\$base/$base/g;
  113. &System("$lcommand");
  114. if ($v>=2) {printf("\nProcess $$ for file %s (%i) finished.",$file,$ifile);}
  115. kill(USR1 => $parent_pid);
  116. $SIG{'CHLD'}='IGNORE';
  117. exit;
  118. } else {
  119. die("\nError: fork returned undefined PID: $!\n");
  120. }
  121. }
  122. # Wait for all children to finish
  123. while (wait() != -1) {}
  124. if ($v>=1) {print ("\nAll processes should be finished now\n");}
  125. if ($numerr>0) {print(STDERR "WARNING: $numerr commands returned with error code.\n");}
  126. exit(0);
  127. sub ChildFinished() {
  128. $children--;
  129. $SIG{'USR1'}=\&ChildFinished;
  130. if ($v>=2) {printf("\nChildren counter reduced to children=$children",$file,$ifile);}
  131. return;
  132. }
  133. sub KillAllProcesses()
  134. {
  135. foreach $pid (keys(%pid)) {
  136. if ($v>=2) {printf("\nKill process $pid: returned %i\n",kill(-9,$pid));}
  137. }
  138. die ("\nInterrupt: Killed main process $$\n");
  139. }
  140. ################################################################################################
  141. ### System command
  142. ################################################################################################
  143. sub System {
  144. if ($v>=2) {print("\n");}
  145. if ($v>=1) {print("\n".$_[0]," ");}
  146. if (system($_[0])) {
  147. # Why is always -1 returned???
  148. # print(STDERR "\nERROR: command '$command' returned error code $?\n");
  149. # $numerr++;
  150. };
  151. }