checkpoint_signals.cpp 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. /******************************************************************************
  2. *
  3. * Copyright (C) 2020 by
  4. * The Salk Institute for Biological Studies
  5. *
  6. * Use of this source code is governed by an MIT-style
  7. * license that can be found in the LICENSE file or at
  8. * https://opensource.org/licenses/MIT.
  9. *
  10. ******************************************************************************/
  11. #include "checkpoint_signals.h"
  12. #include <iostream>
  13. #include <set>
  14. #include <signal.h>
  15. #ifndef _MSC_VER
  16. #include <unistd.h>
  17. #endif
  18. #include "api/api_common.h"
  19. #include "api/model.h"
  20. #include "api/python_exporter.h"
  21. #include "src4/world.h"
  22. #include "src4/viz_output_event.h"
  23. using namespace std;
  24. #ifndef _WIN64
  25. struct sigaction g_previous_sigaction_sigusr1;
  26. struct sigaction g_previous_sigaction_sigusr2;
  27. struct sigaction g_previous_sigaction_sigalrm;
  28. #endif
  29. namespace MCell {
  30. namespace API {
  31. // WARNING: not multithread-safe
  32. std::set<Model*> g_models;
  33. // WARNING: only limited set of calls is allowed in signal handlers,
  34. // e.g. no malloc
  35. void checkpoint_signal_handler(int signo) {
  36. // checkpoint can be requested multiple times even when the
  37. // scheduling of a checkpoint is running
  38. for (Model* m: g_models) {
  39. if (m->get_world() != nullptr) {
  40. m->get_world()->set_to_create_checkpoint_event_from_signal_hadler(signo, m);
  41. }
  42. }
  43. }
  44. // Set signal handlers for checkpointing on SIGUSR signals.
  45. void set_checkpoint_signals(Model* model) {
  46. bool already_set = !g_models.empty();
  47. g_models.insert(model);
  48. // Windows does not support USR signals
  49. if (!already_set) {
  50. #ifndef _WIN64
  51. struct sigaction sa;
  52. sa.sa_sigaction = NULL;
  53. sa.sa_handler = &checkpoint_signal_handler;
  54. sa.sa_flags = SA_RESTART;
  55. sigfillset(&sa.sa_mask);
  56. if (::sigaction(SIGUSR1, &sa, &g_previous_sigaction_sigusr1) != 0) {
  57. throw RuntimeError("Failed to install SIGUSR1 signal handler.");
  58. }
  59. if (::sigaction(SIGUSR2, &sa, &g_previous_sigaction_sigusr2) != 0) {
  60. throw RuntimeError("Failed to install SIGUSR2 signal handler.");
  61. }
  62. if (::sigaction(SIGALRM, &sa, &g_previous_sigaction_sigalrm) != 0) {
  63. throw RuntimeError("Failed to install SIGUSR2 signal handler.");
  64. }
  65. #endif
  66. }
  67. }
  68. void unset_checkpoint_signals(Model* model) {
  69. if (g_models.count(model) == 0) {
  70. // either not set or unset twice, ignore
  71. return;
  72. }
  73. g_models.erase(model);
  74. // Windows does not support USR signals
  75. // SIGALRM should be supported somehow but it does not work yet
  76. if (g_models.empty()) {
  77. #ifndef _WIN32
  78. if (sigaction(SIGUSR1, &g_previous_sigaction_sigusr1, nullptr) != 0) {
  79. cout << "Warning: failed to uninstall SIGUSR1 signal handler.\n";
  80. }
  81. if (sigaction(SIGUSR2, &g_previous_sigaction_sigusr2, nullptr) != 0) {
  82. cout << "Warning: failed to uninstall SIGUSR2 signal handler.\n";
  83. }
  84. if (sigaction(SIGALRM, &g_previous_sigaction_sigalrm, nullptr) != 0) {
  85. cout << "Warning: failed to uninstall SIGALRM signal handler.\n";
  86. }
  87. #endif
  88. }
  89. }
  90. void save_checkpoint_func(const double time, CheckpointSaveEventContext ctx) {
  91. const World* world = ctx.model->get_world();
  92. release_assert(
  93. world->scheduler.get_event_being_executed()->type_index == EVENT_TYPE_INDEX_CALL_START_ITERATION_CHECKPOINT &&
  94. "May be called only from event with index EVENT_TYPE_INDEX_CALL_START_ITERATION_CHECKPOINT, "
  95. " world/model data may be inconsistent otherwise"
  96. );
  97. uint64_t current_it = world->stats.get_current_iteration();
  98. std::string dir;
  99. if (ctx.append_it_to_dir) {
  100. dir =
  101. ctx.dir_prefix +
  102. VizOutputEvent::iterations_to_string(world->stats.get_current_iteration(), ctx.model->config.total_iterations) +
  103. BNG::PATH_SEPARATOR;
  104. }
  105. else {
  106. dir = ctx.dir_prefix;
  107. }
  108. cout << "Saving scheduled checkpoint in iteration " << current_it << " into " << dir << "\n";
  109. PythonExporter exporter(ctx.model);
  110. exporter.save_checkpoint(dir);
  111. }
  112. } // namespace API
  113. } // namespace MCell