simpleTree.pm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. #!/user/bin/perl -w
  2. ## simple undirected binary tree
  3. use strict;
  4. package simpleTree;
  5. sub new {
  6. my $caller = shift;
  7. my $caller_is_obj = ref($caller);
  8. my $class = $caller_is_obj || $caller;
  9. no strict "refs";
  10. my $self = bless({}, $class);
  11. $self->{nodes} = {};
  12. $self->{parent} = {};
  13. $self->{weight} = {};
  14. $self->{numNodes} = 0;
  15. $self->{left} = {};
  16. $self->{right} = {};
  17. $self->{visited} = {};
  18. if ($caller_is_obj) {
  19. $self->{nodes} = $caller->{nodes};
  20. $self->{parent} = $caller->{parent};
  21. $self->{weight} = $caller->{weight};
  22. $self->{numNodes} = $caller->{numNodes};
  23. $self->{left} = $caller->{left};
  24. $self->{right} = $caller->{right};
  25. }
  26. return $self;
  27. }
  28. sub addLeaf {
  29. my $self = shift;
  30. my $id = shift;
  31. $self->{nodes}{$id} = $self->{numNodes};
  32. $self->{parent}{$id} = -1;
  33. $self->{left}{$id} = -1;
  34. $self->{right}{$id} = -1;
  35. $self->{weight}{$id} = 0;
  36. $self->{numNodes}++;
  37. }
  38. sub addNode {
  39. my $self = shift;
  40. my $id = shift;
  41. my $parent = shift;
  42. my $left = shift;
  43. my $right = shift;
  44. my $weight = shift;
  45. $self->{nodes}->{$id} = $id;
  46. $self->{parent}->{$id} = $parent;
  47. $self->{left}->{$id} = $left;
  48. $self->{right}->{$id} = $right;
  49. $self->{weight}->{$id} = $weight;
  50. }
  51. sub isConnected {
  52. my $self = shift;
  53. my $nodeNum1 = shift;
  54. my $nodeNum2 = shift;
  55. my $num = "$nodeNum1&$nodeNum2";
  56. my $numr = "$nodeNum2&$nodeNum1";
  57. return 1 if (exists($self->{nodes}->{$num}));
  58. return 1 if (exists($self->{nodes}->{$numr}));
  59. return 0;
  60. }
  61. sub connectAdaptWeightToHeight {
  62. my $self = shift;
  63. my $nodeNum1 = shift;
  64. my $nodeNum2 = shift;
  65. my $weight1 = shift;
  66. my $weight2 = shift;
  67. my $parentNum = shift;
  68. $parentNum = $self->{numNodes} if (not defined($parentNum));
  69. return if ($self->isConnected($nodeNum1, $nodeNum2));
  70. $self->addLeaf($parentNum);
  71. $self->{left}->{$parentNum} = $nodeNum1;
  72. $self->{right}->{$parentNum} = $nodeNum2;
  73. $self->{parent}->{$nodeNum1} = $parentNum;
  74. $self->{parent}->{$nodeNum2} = $parentNum;
  75. $self->{weight}->{$nodeNum1} = $weight1 - $self->getHeight($nodeNum1);
  76. $self->{weight}->{$nodeNum2} = $weight2 - $self->getHeight($nodeNum2);
  77. }
  78. sub connect {
  79. my $self = shift;
  80. my $nodeNum1 = shift;
  81. my $nodeNum2 = shift;
  82. my $weight1 = shift;
  83. my $weight2 = shift;
  84. my $parentNum = shift;
  85. $parentNum = $self->{numNodes} if (not defined($parentNum));
  86. return if ($self->isConnected($nodeNum1, $nodeNum2));
  87. $self->addLeaf($parentNum);
  88. $self->{left}->{$parentNum} = $nodeNum1;
  89. $self->{right}->{$parentNum} = $nodeNum2;
  90. $self->{parent}->{$nodeNum1} = $parentNum;
  91. $self->{parent}->{$nodeNum2} = $parentNum;
  92. $self->{weight}->{$nodeNum1} = $weight1;
  93. $self->{weight}->{$nodeNum2} = $weight2;
  94. }
  95. sub getHeight {
  96. my $self = shift;
  97. my $node = shift;
  98. my $height = 0;
  99. while ($self->{left}->{$node} != -1) {
  100. $height += $self->{weight}->{$self->{left}->{$node}};
  101. $node = $self->{left}->{$node};
  102. }
  103. return $height;
  104. }
  105. sub copyBranch {
  106. my $self = shift;
  107. my $newTree = shift;
  108. my $root = shift;
  109. my @stack = ();
  110. my %visited;
  111. push (@stack, $root);
  112. while(@stack > 0) {
  113. my $node = $stack[scalar(@stack)-1];
  114. ##
  115. if ($self->{left}->{$node} != -1 and not exists($visited{$self->{left}->{$node}})) {
  116. push(@stack, $self->{left}->{$node});
  117. }
  118. else {
  119. if ($self->{right}->{$node} != -1 and not exists($visited{$self->{right}->{$node}})) {
  120. push(@stack, $self->{right}->{$node});
  121. }
  122. else {
  123. $visited{$node} = 1;
  124. $newTree->{nodes}->{$node} = $self->{nodes}->{$node};
  125. $newTree->{parent}->{$node} = $self->{parent}->{$node};
  126. $newTree->{left}->{$node} = $self->{left}->{$node};
  127. $newTree->{right}->{$node} = $self->{right}->{$node};
  128. $newTree->{weight}->{$node} = $self->{weight}->{$node};
  129. pop(@stack);
  130. }
  131. }
  132. }
  133. }
  134. sub copyNode {
  135. my $self = shift;
  136. my $newTree = shift;
  137. my $node = shift;
  138. return if ( exists($newTree->{nodes}->{$node}) );
  139. $newTree->{nodes}->{$node} = $self->{nodes}->{$node};
  140. $newTree->{parent}->{$node} = $self->{parent}->{$node};
  141. $newTree->{left}->{$node} = $self->{left}->{$node};
  142. $newTree->{right}->{$node} = $self->{right}->{$node};
  143. $newTree->{weight}->{$node} = $self->{weight}->{$node};
  144. $newTree->{numNodes}++;
  145. }
  146. ## root the tree at one of its leafs "L" , i.e. rearrange the whole tree
  147. ## so that finally the parent of "L" is new root
  148. sub reRoot {
  149. my $self = shift;
  150. my $newRoot = shift;
  151. if (not $self->isLeaf($newRoot)) {
  152. print "Error: $newRoot is not a leaf!\n";
  153. return -1;
  154. }
  155. my $newTree = simpleTree->new();
  156. my $prevNode = $newRoot;
  157. my $prevParent = -1;
  158. my $newParent = -1;
  159. my $parent = $self->{parent}->{$newRoot};
  160. my $weight = $self->{weight}->{$newRoot};
  161. ## save new "root node" for update at the end
  162. my $newRootWeight = $self->{weight}->{$newRoot};
  163. my $saveParent = $parent;
  164. #$newTree->addNode($newRoot, -1, -1, -1, $self->{weight}->{$newRoot});
  165. ## if parent of "new root" is already root: just set "new root" as root and update weight
  166. if ($parent == $self->getRoot()) {
  167. if ($self->{left}->{$parent} == $newRoot) {
  168. $self->copyBranch($newTree, $self->{right}->{$parent});
  169. $newTree->{parent}->{$self->{right}->{$parent}} = -1;
  170. ## update weight of new root
  171. $newTree->{weight}->{$self->{right}->{$parent}} += $weight;
  172. }
  173. else {
  174. $self->copyBranch($newTree, $self->{left}->{$parent});
  175. $newTree->{parent}->{$self->{left}->{$parent}} = -1;
  176. ## update weight of new root
  177. $newTree->{weight}->{$self->{left}->{$parent}} += $weight;
  178. }
  179. return $newTree;
  180. }
  181. ## if node which is to become root isnt directly below root in tree:
  182. ## set "new root" as root and go up the tree; at each level, copy branch not
  183. ## containing Q and reverse roles of current parent and child
  184. while ($parent != $self->getRoot()) {
  185. ## insert current parent into new tree - set parent explicitly!
  186. $self->copyNode($newTree, $parent);
  187. $newTree->{parent}->{$parent} = $prevParent;
  188. ## coming from left node, then copy right branch
  189. if ($self->{left}->{$parent} == $prevNode) {
  190. $self->copyBranch($newTree, $self->{right}->{$parent});
  191. $newParent = $self->{parent}->{$parent};
  192. ## root of tree is reached: copy branch not containing Q
  193. if ($newParent == $self->getRoot()) {
  194. ## if Q is in right branch
  195. if ($self->{right}->{$newParent} == $parent) {
  196. $newTree->{left}->{$parent} = $self->{left}->{$newParent};
  197. $self->copyBranch($newTree, $self->{left}->{$newParent});
  198. $newTree->{parent}->{$self->{left}->{$newParent}} = $parent;
  199. ## update weight
  200. $newTree->{weight}->{$self->{left}->{$newParent}} += $self->{weight}->{$parent};
  201. }
  202. ## if Q is in left branch
  203. else {
  204. $newTree->{left}->{$parent} = $self->{right}->{$newParent};
  205. $self->copyBranch($newTree, $self->{right}->{$newParent});
  206. $newTree->{parent}->{$self->{right}->{$newParent}} = $parent;
  207. ## update weight
  208. $newTree->{weight}->{$self->{right}->{$newParent}} += $self->{weight}->{$parent};
  209. }
  210. }
  211. ## change role of parent and child
  212. else {
  213. $newTree->{left}->{$parent} = $newParent;
  214. $newTree->{parent}->{$newParent} = $parent;
  215. }
  216. ## go up one level
  217. $prevNode = $parent;
  218. $prevParent = $parent;
  219. $parent = $newParent;
  220. }
  221. ## coming from right node, then copy left branch
  222. elsif ($self->{right}->{$parent} == $prevNode) {
  223. $self->copyBranch($newTree, $self->{left}->{$parent});
  224. $newParent = $self->{parent}->{$parent};
  225. if ($newParent == $self->getRoot()) {
  226. if ($self->{right}->{$newParent} == $parent) {
  227. $newTree->{right}->{$parent} = $self->{left}->{$newParent};
  228. $self->copyBranch($newTree, $self->{left}->{$newParent});
  229. $newTree->{parent}->{$self->{left}->{$newParent}} = $parent;
  230. ## update weight
  231. $newTree->{weight}->{$self->{left}->{$newParent}} += $self->{weight}->{$parent};
  232. }
  233. else {
  234. $newTree->{right}->{$parent} = $self->{right}->{$newParent};
  235. $self->copyBranch($newTree, $self->{right}->{$newParent});
  236. $newTree->{parent}->{$self->{right}->{$newParent}} = $parent;
  237. ## update weight
  238. $newTree->{weight}->{$self->{right}->{$newParent}} += $self->{weight}->{$parent};
  239. }
  240. }
  241. else {
  242. $newTree->{right}->{$parent} = $newParent;
  243. $newTree->{parent}->{$newParent} = $parent;
  244. }
  245. $prevNode = $parent;
  246. $prevParent = $parent;
  247. $parent = $newParent;
  248. }
  249. }
  250. ## finally update weight of new root (i.e. parent of "newRoot")
  251. $newTree->{weight}->{$saveParent} = $newRootWeight;
  252. return $newTree;
  253. }
  254. sub getRoot {
  255. my $self = shift;
  256. foreach my $node (keys %{$self->{nodes}}) {
  257. if ($self->{parent}->{$node} == -1) {
  258. return $node;
  259. }
  260. }
  261. return -1;
  262. }
  263. ## weight each leaf in tree with formula (5.9), Durbin
  264. ## i.e. do postorder traversal
  265. sub weightScheme {
  266. my $self = shift;
  267. my %alreadyVisited;
  268. my %leafWeight;
  269. my @stack;
  270. my $root = $self->getRoot();
  271. ## initialize leaf weights with edge lengths of leafs
  272. my @leafs = $self->getAllLeavesBelow($root);
  273. foreach my $leaf (@leafs) {
  274. $leafWeight{$leaf} = $self->{weight}->{$leaf};
  275. }
  276. ## reweight edge length according to formula (5.9):
  277. ## do postorder traversal and for each inner node (these are visited in appropriate manner so that
  278. ## all nodes below have already been visited) share its weight among its leafs
  279. push(@stack, $root);
  280. while (scalar(@stack) != 0) {
  281. my $currentNode = $stack[$#stack];
  282. my $left = $self->{left}->{$currentNode};
  283. my $right = $self->{right}->{$currentNode};
  284. ## if unvisited inner node
  285. if ($left != -1 and not exists($alreadyVisited{$left})) {
  286. push(@stack, $left);
  287. }
  288. else {
  289. if ($right != -1 and not exists($alreadyVisited{$right})) {
  290. push(@stack, $right);
  291. }
  292. else {
  293. #print "$currentNode\n";
  294. if ($self->isLeaf($currentNode)) {
  295. ## skip
  296. }
  297. ## postorder traversal: all nodes below currentNode have been visited
  298. else {
  299. ## share weight at parent among all leafs
  300. my $parentWeight = $self->{weight}->{$currentNode};
  301. my @myLeafs = $self->getAllLeavesBelow($currentNode);
  302. my $normalizer = 0;
  303. foreach my $leaf (@myLeafs) {
  304. $normalizer += $leafWeight{$leaf};
  305. }
  306. foreach my $leaf (@myLeafs) {
  307. $leafWeight{$leaf} = $leafWeight{$leaf} + $parentWeight * $leafWeight{$leaf}/$normalizer;
  308. print "new leaf weight for $leaf: $leafWeight{$leaf} ($parentWeight, $normalizer)\n";
  309. }
  310. }
  311. $alreadyVisited{$currentNode} = 1;
  312. pop(@stack);
  313. }
  314. }
  315. }
  316. return %leafWeight;
  317. }
  318. ## calculate mean branch length from all leafs to root
  319. sub meanBranchLength {
  320. my $self = shift;
  321. my @leafs = $self->getAllLeavesBelow($self->getRoot());
  322. my $totalLen = 0;
  323. for (my $i=0; $i<@leafs; $i++) {
  324. my $parent = $self->{parent}->{$leafs[$i]};
  325. my $branchLen = $self->{weight}->{$leafs[$i]};
  326. while ($parent != -1) {
  327. $branchLen += $self->{weight}->{$parent};
  328. $parent = $self->{parent}->{$parent};
  329. }
  330. $totalLen += $branchLen;
  331. }
  332. my $meanLen = $totalLen / scalar(@leafs);
  333. return ($meanLen);
  334. }
  335. sub isLeaf {
  336. my $self = shift;
  337. my $node = shift;
  338. return 1 if $self->{left}->{$node} == -1 and $self->{right}->{$node} == -1;
  339. return 0;
  340. }
  341. ## return all leaves below node "node"
  342. sub getAllLeavesBelow {
  343. my $self = shift;
  344. my $node = shift;
  345. my @leaves;
  346. my @stack;
  347. push(@stack, $node);
  348. while(scalar(@stack) != 0) {
  349. my $currentNode = pop(@stack);
  350. ## new leaf found
  351. if ($self->{left}->{$currentNode} == -1 and $self->{right}->{$currentNode} == -1) {
  352. push(@leaves, $currentNode);
  353. }
  354. if ($self->{left}->{$currentNode} != -1) {
  355. push(@stack, $self->{left}->{$currentNode});
  356. }
  357. if ($self->{right}->{$currentNode} != -1) {
  358. push(@stack, $self->{right}->{$currentNode});
  359. }
  360. }
  361. return @leaves;
  362. }
  363. sub visitAllLeaves {
  364. my $self = shift;
  365. my $root = $self->getRoot();
  366. my @leaves = $self->getAllLeavesBelow($root);
  367. foreach my $node (keys %{$self->{nodes}}) {
  368. $self->{visited}->{$node} = 0;
  369. }
  370. foreach my $leaf (@leaves) {
  371. $self->{visited}->{$leaf} = 1;
  372. }
  373. }
  374. ## get next node which has only "leaves" attached
  375. sub getNextAllBelowVisitedNode {
  376. my $self = shift;
  377. my $root = $self->getRoot();
  378. while ( $self->{visited}->{$root} != 1) {
  379. if ( ! $self->{visited}->{$self->{left}->{$root}}) {
  380. $root = $self->{left}->{$root};
  381. } else {
  382. if ( ! $self->{visited}->{$self->{right}->{$root}}) {
  383. $root = $self->{right}->{$root};
  384. } else {
  385. $self->{visited}->{$root} = 1;
  386. }
  387. }
  388. }
  389. return $root;
  390. }
  391. sub lca {
  392. my $self = shift;
  393. }
  394. sub print {
  395. my $self = shift;
  396. foreach my $node (keys %{$self->{nodes}}) {
  397. #print "node $node\n";
  398. print ("node: $node has parent " . $self->{parent}->{$node} . " with weight " . $self->{weight}->{$node} . " left=" . $self->{left}->{$node} . " right=" . $self->{right}->{$node} ."\n");
  399. }
  400. }
  401. 1;