generator_utils.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699
  1. /******************************************************************************
  2. *
  3. * Copyright (C) 2020 by
  4. * The Salk Institute for Biological Studies
  5. *
  6. * Use of this source code is governed by an MIT-style
  7. * license that can be found in the LICENSE file or at
  8. * https://opensource.org/licenses/MIT.
  9. *
  10. ******************************************************************************/
  11. #include <fstream>
  12. #include <algorithm>
  13. #include <ctype.h>
  14. #include "generator_utils.h"
  15. #include "api/python_export_constants.h"
  16. #include "api/compartment_utils.h"
  17. #include "generator_structs.h"
  18. using namespace MCell::API;
  19. namespace MCell {
  20. void check_not_empty(const Value& parent, const char* key, const std::string& msg_location) {
  21. string s = parent[key].asString();
  22. bool white_space = std::all_of(s.begin(), s.end(), ::isspace);
  23. if (s == "" || white_space) {
  24. ERROR("Unexpected empty string as value of " + msg_location + " - " + key + ".");
  25. }
  26. }
  27. static std::string replace_id_with_function(const std::string& id, const bool use_python_functions) {
  28. const auto& func_it = mdl_functions_to_py_bngl_map.find(id);
  29. if (func_it != mdl_functions_to_py_bngl_map.end()) {
  30. // replace
  31. if (use_python_functions) {
  32. return func_it->second.first;
  33. }
  34. else {
  35. // BNGL variant
  36. return func_it->second.second;
  37. }
  38. }
  39. else {
  40. // other id, keep it as it is
  41. return id;
  42. }
  43. }
  44. // when use_python_functions is true, function calls are replaced with Python function names
  45. // when false, they are replaced with BNGL function names
  46. std::string replace_function_calls_in_expr(const std::string& data_model_expr, const bool use_python_functions) {
  47. std::string res;
  48. // all function names are represented as ids, i.e. [a-zA-Z_][a-zA-Z0-9_]*
  49. // practically the same automaton as in get_used_ids
  50. enum state_t {
  51. START,
  52. IN_ID,
  53. IN_NUM
  54. };
  55. state_t state = START;
  56. string curr_id;
  57. for (size_t i = 0; i < data_model_expr.size(); i++) {
  58. char c = data_model_expr[i];
  59. switch (state) {
  60. case START:
  61. if (isalpha(c) || c == '_') {
  62. state = IN_ID;
  63. curr_id += c;
  64. }
  65. else if (isdigit(c)) {
  66. state = IN_NUM;
  67. res += c;
  68. }
  69. else {
  70. res += c;
  71. }
  72. break;
  73. case IN_ID:
  74. if (isalnum(c) || c == '_') {
  75. curr_id += c;
  76. }
  77. else {
  78. res += replace_id_with_function(curr_id, use_python_functions);
  79. res += c;
  80. curr_id = "";
  81. state = START;
  82. }
  83. break;
  84. case IN_NUM:
  85. if (isdigit(c) || c == '.' || c == 'e' || c == '+' || c == '-') {
  86. // ok - but must not be exp
  87. res += c;
  88. }
  89. else {
  90. if (isalpha(c) || c == '_') {
  91. state = IN_ID;
  92. curr_id += c;
  93. }
  94. else {
  95. state = START;
  96. res += c;
  97. }
  98. }
  99. break;
  100. }
  101. }
  102. if (state == IN_ID) {
  103. res += replace_id_with_function(curr_id, use_python_functions);
  104. }
  105. return res;
  106. }
  107. std::string get_module_name_w_prefix(const std::string& output_files_prefix, const std::string file_suffix) {
  108. if (output_files_prefix == "" || output_files_prefix.back() == '/' || output_files_prefix.back() == '\\') {
  109. return file_suffix;
  110. }
  111. else {
  112. size_t pos = output_files_prefix.find_last_of("/\\");
  113. if (pos == string::npos) {
  114. return output_files_prefix + "_" + file_suffix;
  115. }
  116. else {
  117. return output_files_prefix.substr(pos) + "_" + file_suffix;
  118. }
  119. }
  120. }
  121. void parse_rxn_rule_side(
  122. Json::Value& substances_node,
  123. std::vector<std::string>& substances,
  124. std::vector<std::string>& orientations) {
  125. // cannot use BNGL parser directly because reactions may contain orientations
  126. substances.clear();
  127. orientations.clear();
  128. string str = substances_node.asString();
  129. // finite automata to parse the reaction side string, e.g. "a + b"
  130. enum state_t {
  131. START,
  132. CPLX,
  133. IN_PAREN,
  134. AFTER_MOL,
  135. AFTER_ORIENT,
  136. AFTER_PLUS
  137. };
  138. state_t state = START;
  139. string current_id;
  140. for (size_t i = 0; i < str.size(); i++) {
  141. char c = str[i];
  142. switch (state) {
  143. case START:
  144. if (isalnum(c) || c == '_' || c == '.' || c == '@') {
  145. state = CPLX;
  146. current_id = c;
  147. }
  148. else if (isblank(c)) {
  149. // ok
  150. }
  151. else {
  152. ERROR("Could not parse reaction side " + str + " (START).");
  153. }
  154. break;
  155. case CPLX:
  156. if (isalnum(c) || c == '_' || c == '.' || c == '@' || c == ':') {
  157. current_id += c;
  158. }
  159. else if (c == '(') {
  160. state = IN_PAREN;
  161. current_id += '(';
  162. }
  163. else if (isblank(c) || c == '+' || c == '\'' || c == ',' || c == ';') {
  164. substances.push_back(current_id);
  165. orientations.push_back("");
  166. if (c == '\'' || c == ',' || c == ';') {
  167. orientations.back() = c;
  168. }
  169. current_id = "";
  170. if (c == '+') {
  171. state = AFTER_PLUS;
  172. }
  173. else {
  174. state = AFTER_MOL;
  175. }
  176. }
  177. else {
  178. ERROR("Could not parse reaction side " + str + " (MOL_ID).");
  179. }
  180. break;
  181. case IN_PAREN:
  182. if (isalnum(c) || c == '_' || c == ',' || c == '~' || c == '!' || c == '+' || c == '?') {
  183. current_id += c;
  184. }
  185. else if (c == ')') {
  186. state = CPLX;
  187. current_id += ')';
  188. }
  189. else {
  190. ERROR("Could not parse reaction side " + str + " (IN_PAREN).");
  191. }
  192. break;
  193. case AFTER_MOL:
  194. if (c == '+') {
  195. state = AFTER_PLUS;
  196. }
  197. else if (c == '\'') {
  198. state = AFTER_ORIENT;
  199. orientations.back() = c;
  200. }
  201. else if (c == ',') {
  202. state = AFTER_ORIENT;
  203. orientations.back() = c;
  204. }
  205. else if (c == ';') {
  206. state = AFTER_ORIENT;
  207. orientations.back() = c;
  208. }
  209. else if (isblank(c)) {
  210. // ok
  211. }
  212. else {
  213. ERROR("Could not parse reaction side " + str + " (AFTER_MOL).");
  214. }
  215. break;
  216. case AFTER_ORIENT:
  217. if (c == '+') {
  218. state = AFTER_PLUS;
  219. }
  220. else if (isblank(c)) {
  221. // ok
  222. }
  223. else {
  224. ERROR("Could not parse reaction side " + str + " (AFTER_ORIENT).");
  225. }
  226. break;
  227. case AFTER_PLUS:
  228. if (isalnum(c) || c == '_' || c == '@') {
  229. state = CPLX;
  230. current_id = c;
  231. }
  232. else if (isblank(c)) {
  233. // ok
  234. }
  235. else {
  236. ERROR("Could not parse reaction side " + str + " (AFTER_PLUS).");
  237. }
  238. break;
  239. default:
  240. assert(false);
  241. }
  242. }
  243. if (current_id != "") {
  244. substances.push_back(current_id);
  245. orientations.push_back("");
  246. }
  247. }
  248. string remove_compartments(const std::string& species_name) {
  249. size_t i = 0;
  250. string res;
  251. bool in_compartment = false;
  252. while (i < species_name.size()) {
  253. char c = species_name[i];
  254. if (c == '@') {
  255. assert(!in_compartment);
  256. in_compartment = true;
  257. }
  258. else if (in_compartment && (!isalnum(c) && c != '_')) {
  259. in_compartment = false;
  260. if (c != ':') {
  261. res += c;
  262. }
  263. }
  264. else if (!in_compartment) {
  265. res += c;
  266. }
  267. i++;
  268. }
  269. return res;
  270. }
  271. // returns "" if there are multiple compartments and sets *has_multiple_compartments to
  272. // true if it is not nullptr
  273. string get_single_compartment(const std::string& name, bool* has_multiple_compartments) {
  274. std::vector<std::string> compartments;
  275. API::get_compartment_names(name, compartments);
  276. if (has_multiple_compartments != nullptr) {
  277. *has_multiple_compartments = false;
  278. }
  279. if (compartments.empty()) {
  280. return "";
  281. }
  282. else {
  283. string res = compartments[0];
  284. for (size_t i = 1; i < compartments.size(); i++) {
  285. if (res != compartments[i]) {
  286. // multiple compartments
  287. if (has_multiple_compartments != nullptr) {
  288. *has_multiple_compartments = true;
  289. }
  290. return "";
  291. }
  292. }
  293. return res;
  294. }
  295. }
  296. static string make_cplx(const string bngl_str, const string orient = "", const string compartment = "") {
  297. string res = S(MDOT) + API::NAME_CLASS_COMPLEX + "('" + fix_dots_in_simple_species(bngl_str) + "'";
  298. if (orient != "") {
  299. res += S(", ") + API::NAME_ORIENTATION + " = " + MDOT + API::NAME_ENUM_ORIENTATION + "." + orient;
  300. }
  301. if (compartment != "") {
  302. res += S(", ") + API::NAME_COMPARTMENT_NAME + " = '" + compartment + "'";
  303. }
  304. res += ")";
  305. return res;
  306. }
  307. bool static is_mdot_superclass(const std::string& name) {
  308. return
  309. name == S(MDOT) + API::NAME_CV_AllMolecules ||
  310. name == S(MDOT) + API::NAME_CV_AllVolumeMolecules ||
  311. name == S(MDOT) + API::NAME_CV_AllSurfaceMolecules;
  312. }
  313. string make_species_or_cplx(
  314. const SharedGenData& data,
  315. const std::string& name,
  316. const std::string& orient,
  317. const std::string& compartment) {
  318. bool is_superclass = is_mdot_superclass(name);
  319. if (is_superclass && orient == "" && compartment == "") {
  320. return name;
  321. }
  322. if (is_superclass || !data.bng_mode) {
  323. stringstream ss;
  324. const SpeciesOrMolType* species_info = data.find_species_or_mol_type_info(name);
  325. if (is_superclass || (species_info != nullptr && species_info->is_species)) {
  326. // substance was declared as species, we can use its id directly
  327. ss << make_id(name) << "." << API::NAME_INST << "(";
  328. if (orient != "") {
  329. assert(compartment == "");
  330. ss <<
  331. API::NAME_ORIENTATION << " = " <<
  332. MDOT << API::NAME_ENUM_ORIENTATION << "." << orient;
  333. }
  334. if (compartment != "") {
  335. assert(orient == "");
  336. ss <<
  337. API::NAME_COMPARTMENT_NAME << " = '" << compartment << "'";
  338. }
  339. ss << ")";
  340. return ss.str();
  341. }
  342. }
  343. // otherwise we will generate a BNGL string
  344. return make_cplx(name, orient, compartment);
  345. }
  346. string reaction_name_to_id(const string& json_name) {
  347. string res_name = json_name;
  348. replace(res_name.begin(), res_name.end(), ' ', '_');
  349. replace(res_name.begin(), res_name.end(), '.', '_');
  350. replace(res_name.begin(), res_name.end(), ')', '_');
  351. replace(res_name.begin(), res_name.end(), '(', '_');
  352. replace(res_name.begin(), res_name.end(), '!', '_');
  353. replace(res_name.begin(), res_name.end(), '~', '_');
  354. replace(res_name.begin(), res_name.end(), ':', '_');
  355. res_name = regex_replace(res_name, regex("<->"), "revto");
  356. res_name = regex_replace(res_name, regex("->"), "to");
  357. res_name = regex_replace(res_name, regex("\\+"), "plus");
  358. res_name = regex_replace(res_name, regex("\\?"), "any_bond");
  359. res_name = regex_replace(res_name, regex("'"), "_up");
  360. res_name = regex_replace(res_name, regex(","), "_down");
  361. res_name = regex_replace(res_name, regex(";"), "_any");
  362. res_name = regex_replace(res_name, regex("@"), "_at_");
  363. return res_name;
  364. }
  365. string get_rxn_id(Json::Value& reaction_list_item, uint& unnamed_rxn_counter) {
  366. string name = reaction_list_item[KEY_RXN_NAME].asString();
  367. if (name == "") {
  368. name = UNNAMED_REACTION_RULE_PREFIX + to_string(unnamed_rxn_counter);
  369. unnamed_rxn_counter++;
  370. }
  371. else {
  372. name = reaction_name_to_id(name);
  373. }
  374. return name;
  375. }
  376. string create_count_name(
  377. const string& what_to_count, const string& where_to_count,
  378. const bool molecules_not_species) {
  379. // first remove all cterm_refixes
  380. regex pattern_cterm(COUNT_TERM_PREFIX);
  381. string what_to_count_no_cterm = regex_replace(what_to_count, pattern_cterm, "");
  382. string res = COUNT_PREFIX + fix_id(what_to_count_no_cterm);
  383. if (!molecules_not_species) {
  384. res += "_species";
  385. }
  386. if (where_to_count != WORLD && where_to_count != "") {
  387. res += "_" + where_to_count;
  388. }
  389. return res;
  390. }
  391. uint get_num_counts_in_mdl_string(const string& mdl_string) {
  392. uint res = 0;
  393. size_t pos = 0;
  394. while ((pos = mdl_string.find(COUNT, pos)) != string::npos) {
  395. res++;
  396. pos += strlen(COUNT);
  397. }
  398. return res;
  399. }
  400. string remove_c_comment(const string& str) {
  401. std::regex e ("/\\*.*\\*/");
  402. return std::regex_replace(str, e, "");
  403. }
  404. string remove_whitespace(const string& str) {
  405. std::regex e ("[ \t]");
  406. return std::regex_replace(str, e, "");
  407. }
  408. size_t find_end_brace_pos(const string& str, const size_t start) {
  409. int braces_count = 1;
  410. assert(str[start] == '[');
  411. size_t i = start + 1;
  412. while (braces_count > 0 && i < str.size()) {
  413. if (str[i] == '[') {
  414. braces_count++;
  415. }
  416. else if (str[i] == ']') {
  417. braces_count--;
  418. }
  419. i++;
  420. }
  421. return i - 1;
  422. }
  423. void process_single_count_term(
  424. const SharedGenData& data,
  425. const string& mdl_string,
  426. bool& rxn_not_mol,
  427. bool& molecules_not_species,
  428. string& what_to_count,
  429. string& where_to_count,
  430. string& orientation) {
  431. // mdl_string is always in the form COUNT[what,where]
  432. size_t start_brace = mdl_string.find('[');
  433. size_t comma = mdl_string.rfind(',');
  434. size_t end_brace = find_end_brace_pos(mdl_string, start_brace);
  435. if (mdl_string.find(COUNT) == string::npos) {
  436. ERROR("String 'COUNT' was not found in mdl_string '" + mdl_string + "'.");
  437. }
  438. if (start_brace == string::npos || comma == string::npos || end_brace == string::npos ||
  439. start_brace > comma || start_brace > end_brace || comma > end_brace
  440. ) {
  441. ERROR("Malformed mdl_string '" + mdl_string + "'.");
  442. }
  443. what_to_count = mdl_string.substr(start_brace + 1, comma - start_brace - 1);
  444. what_to_count = trim(what_to_count);
  445. // default is 'molecules_pattern', for now we are storing the
  446. // as a comment because the counting type belongs to the count term,
  447. // not to the whole 'reaction_output_list'
  448. // TODO: this should resolved in a better way
  449. molecules_not_species = true;
  450. if (what_to_count.find(MARKER_SPECIES_COMMENT) != string::npos) {
  451. molecules_not_species = false;
  452. }
  453. what_to_count = remove_c_comment(what_to_count);
  454. // process orientation
  455. orientation = "";
  456. assert(what_to_count != "");
  457. char last_c = what_to_count.back();
  458. if (last_c == '\'' || last_c == ',' || last_c == ';') {
  459. string s;
  460. s = last_c;
  461. orientation = convert_orientation(s);
  462. what_to_count = what_to_count.substr(0, what_to_count.size() - 1);
  463. }
  464. if (data.find_reaction_rule_info(what_to_count) != nullptr) {
  465. rxn_not_mol = true;
  466. }
  467. else {
  468. // if we did not find the name to be a reaction, we assume it is a BNGL pattern
  469. rxn_not_mol = false;
  470. }
  471. string where_tmp = mdl_string.substr(comma + 1, end_brace - comma - 1);
  472. size_t dot_pos = where_tmp.find('.');
  473. if (dot_pos != string::npos) {
  474. // no completely sure when a '.' can appear
  475. where_tmp = where_tmp.substr(dot_pos + 1);
  476. }
  477. where_tmp = trim(where_tmp);
  478. if (where_tmp == WORLD) {
  479. where_tmp = "";
  480. }
  481. // remove all [ALL] and replace box1[box1_sr1] -> box1_box1_sr1
  482. where_to_count = "";
  483. size_t i = 0;
  484. while (i < where_tmp.size()) {
  485. char c = where_tmp[i];
  486. if (c == '[') {
  487. // followed by ALL]?
  488. if (i + 4 < where_tmp.size() && where_tmp.substr(i + 1, 4) == "ALL]") {
  489. // skip
  490. i += 5;
  491. }
  492. else {
  493. // replace
  494. where_to_count += '_';
  495. i++;
  496. }
  497. }
  498. else if (c == ']') {
  499. // ignore
  500. i++;
  501. }
  502. else {
  503. // keep character
  504. where_to_count += c;
  505. i++;
  506. }
  507. }
  508. }
  509. // sets val if the name_or_value is a floating point value,
  510. // if not, tries to find the parameter and reads its value
  511. // returns true on success
  512. // parameters are not evaluated and only one level is tried,
  513. // returns false if value was not obtained
  514. bool get_parameter_value(Json::Value& mcell, const string& name_or_value, double& val) {
  515. try {
  516. val = stod(name_or_value);
  517. return true;
  518. }
  519. catch (const std::invalid_argument&) {
  520. // not a float, try to get parameter value
  521. if (mcell.isMember(KEY_PARAMETER_SYSTEM) && mcell.isMember(KEY_MODEL_PARAMETERS)) {
  522. Json::Value& params = mcell[KEY_PARAMETER_SYSTEM][KEY_MODEL_PARAMETERS];
  523. for (Value::ArrayIndex i = 0; i < params.size(); i++) {
  524. if (params[i][KEY_NAME].asString() == name_or_value) {
  525. try {
  526. if (params[i].isMember(KEY__EXTRAS)) {
  527. val = stod(params[i][KEY__EXTRAS][KEY_PAR_VALUE].asString());
  528. }
  529. else {
  530. val = stod(params[i][KEY_PAR_EXPRESSION].asString());
  531. }
  532. return true;
  533. }
  534. catch (const std::invalid_argument&) {
  535. return false;
  536. }
  537. }
  538. }
  539. }
  540. }
  541. return false;
  542. }
  543. bool is_volume_mol_type(Json::Value& mcell, const std::string& mol_type_name) {
  544. string mol_type_name_no_comp = remove_compartments(mol_type_name);
  545. Value& define_molecules = get_node(mcell, KEY_DEFINE_MOLECULES);
  546. check_version(KEY_DEFINE_MOLECULES, define_molecules, VER_DM_2014_10_24_1638);
  547. Value& molecule_list = get_node(define_molecules, KEY_MOLECULE_LIST);
  548. for (Value::ArrayIndex i = 0; i < molecule_list.size(); i++) {
  549. Value& molecule_list_item = molecule_list[i];
  550. check_version(KEY_MOLECULE_LIST, molecule_list_item, VER_DM_2018_10_16_1632);
  551. string name = molecule_list_item[KEY_MOL_NAME].asString();
  552. if (name != mol_type_name_no_comp) {
  553. continue;
  554. }
  555. string mol_type = molecule_list_item[KEY_MOL_TYPE].asString();
  556. CHECK_PROPERTY(mol_type == VALUE_MOL_TYPE_2D || mol_type == VALUE_MOL_TYPE_3D);
  557. return mol_type == VALUE_MOL_TYPE_3D;
  558. }
  559. ERROR("Could not find species or molecule type '" + mol_type_name_no_comp + "'.");
  560. }
  561. static void get_mol_types_in_species(const std::string& species_name, vector<string>& mol_types) {
  562. mol_types.clear();
  563. size_t i = 0;
  564. string current_name;
  565. bool in_name = true;
  566. while (i < species_name.size()) {
  567. char c = species_name[i];
  568. if (c == '(') {
  569. in_name = false;
  570. assert(current_name != "");
  571. mol_types.push_back(current_name);
  572. current_name = "";
  573. }
  574. else if (c == '.') {
  575. in_name = true;
  576. }
  577. else if (in_name && !isspace(c)) {
  578. current_name += c;
  579. }
  580. i++;
  581. }
  582. if (current_name != "") {
  583. mol_types.push_back(current_name);
  584. }
  585. }
  586. bool is_volume_species(Json::Value& mcell, const std::string& species_name) {
  587. if (is_simple_species(species_name)) {
  588. return is_volume_mol_type(mcell, species_name);
  589. }
  590. else {
  591. vector<string> mol_types;
  592. get_mol_types_in_species( species_name, mol_types);
  593. for (string& mt: mol_types) {
  594. if (!is_volume_mol_type(mcell, mt)) {
  595. // surface
  596. return false;
  597. }
  598. }
  599. return true;
  600. }
  601. }
  602. } // namespace MCell