reac_data_analyzer.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. #!/usr/bin/env python
  2. """
  3. This is free and unencumbered software released into the public domain.
  4. Anyone is free to copy, modify, publish, use, compile, sell, or
  5. distribute this software, either in source code form or as a compiled
  6. binary, for any purpose, commercial or non-commercial, and by any
  7. means.
  8. In jurisdictions that recognize copyright laws, the author or authors
  9. of this software dedicate any and all copyright interest in the
  10. software to the public domain. We make this dedication for the benefit
  11. of the public at large and to the detriment of our heirs and
  12. successors. We intend this dedication to be an overt act of
  13. relinquishment in perpetuity of all present and future rights to this
  14. software under copyright law.
  15. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  18. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  19. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  20. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  21. OTHER DEALINGS IN THE SOFTWARE.
  22. For more information, please refer to [http://unlicense.org]
  23. """
  24. import numpy as np
  25. import pandas as pd
  26. import matplotlib.pyplot as plt
  27. import os
  28. import sys
  29. from itertools import count
  30. # for now a simple scripts that prints averages of last n values of react_data files
  31. # usage:
  32. # python3 reac_data_analyzer.py N
  33. # - N is the number of the last observable counts from react_data outputs that will be averaged
  34. #
  35. # function collectreturns
  36. def get_last_observable_counts(num_last_samples_to_avg=1):
  37. counts = {}
  38. num_seeds = 0
  39. seed_dirs = os.listdir()
  40. # go through all seed_* directories
  41. for seed_dir in seed_dirs:
  42. if not seed_dir.startswith('seed_'):
  43. continue
  44. # we need the total number of directories to compute an average later
  45. num_seeds += 1
  46. # go through all *.dat files in the seed directory
  47. file_list = os.listdir(seed_dir)
  48. for file in file_list:
  49. file_path = os.path.join(seed_dir, file)
  50. # we care only about .dat files
  51. if os.path.isfile(file_path) and file.endswith('.dat'):
  52. print("Processing " + file_path)
  53. observable = os.path.splitext(file)[0]
  54. if observable.endswith('_MDLString'):
  55. observable = observable[:-len('_MDLString')]
  56. # read the .dat file into a pandas dataframe
  57. df = pd.read_csv(file_path, sep=' ', names=['time', 'count'])
  58. #print(df.tail(num_last_samples_to_avg)['count'].mean())
  59. #print("From " + observable)
  60. #sys.exit()
  61. # get average of the last N items
  62. avg_cnt = df.tail(num_last_samples_to_avg)['count'].mean()
  63. # and accumulate the observable count
  64. if observable in counts:
  65. counts[observable] += avg_cnt
  66. else:
  67. counts[observable] = avg_cnt
  68. # compute average of the sums of averages we computed above
  69. res = {}
  70. for v,c in sorted(counts.items()):
  71. res[v] = c / num_seeds
  72. return res
  73. # process argument
  74. num_last_samples_to_avg = 1
  75. if len(sys.argv) == 2:
  76. num_last_samples_to_avg = int(sys.argv[1])
  77. # read all *.dat files in the current directory and return a
  78. # dictionary observable -> average count
  79. avg_counts = get_last_observable_counts(num_last_samples_to_avg)
  80. for v,c in sorted(avg_counts.items()):
  81. print(v + ": " + str(c))