mbd.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
  1. /*****************************************************************
  2. *
  3. * the class IO provides all the functionality to access a
  4. * binary mcell3 reaction data output file.
  5. *
  6. * (c) 2008 Markus Dittrich
  7. *
  8. * This program is free software; you can redistribute it
  9. * and/or modify it under the terms of the GNU General Public
  10. * License Version 3 as published by the Free Software Foundation.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License Version 3 for more details.
  16. *
  17. * You should have received a copy of the GNU General Public
  18. * License along with this program; if not, write to the Free
  19. * Software Foundation, Inc., 59 Temple Place - Suite 330,
  20. * Boston, MA 02111-1307, USA.
  21. *
  22. ****************************************************************/
  23. /* C headers */
  24. #include <bzlib.h>
  25. #include <cstdio>
  26. #include <cstdlib>
  27. #include <cstring>
  28. #include <errno.h>
  29. #include <zlib.h>
  30. /* STL headers */
  31. #include <iostream>
  32. /* local headers */
  33. #include "mbd.h"
  34. /** pull in some things from namespace std */
  35. using std::vector;
  36. using std::cout;
  37. using std::endl;
  38. using std::string;
  39. /*****************************************************************
  40. * constructor
  41. *****************************************************************/
  42. IO::IO(string afile_name)
  43. :
  44. file_name_(afile_name),
  45. num_blocks_(0),
  46. output_type_(0),
  47. blocksize_(0),
  48. stepsize_(0.0),
  49. needed_api_tag_("MCELL_BINARY_API_2")
  50. {}
  51. /******************************************************************
  52. * destructor
  53. ******************************************************************/
  54. IO::~IO()
  55. {
  56. free(buffer_);
  57. }
  58. /*****************************************************************
  59. *
  60. * public member functions
  61. *
  62. *****************************************************************/
  63. /******************************************************************
  64. * member function initizializing the object, vie opening
  65. * the file and parsing the header
  66. ******************************************************************/
  67. bool
  68. IO::initialize()
  69. {
  70. /* open file, don't parse headers if that fails already */
  71. bool status = open_();
  72. if (!status)
  73. {
  74. return false;
  75. }
  76. /* parse headers */
  77. status = parse_header_();
  78. return status;
  79. }
  80. /*******************************************************************
  81. * getter function returning the number of datablocks
  82. *******************************************************************/
  83. uint64_t
  84. IO::get_num_datablocks() const
  85. {
  86. return num_blocks_;
  87. }
  88. /*******************************************************************
  89. * getter function returning the STEP
  90. *******************************************************************/
  91. double
  92. IO::get_stepsize() const
  93. {
  94. return stepsize_;
  95. }
  96. /******************************************************************
  97. * getter function returning the block size
  98. ******************************************************************/
  99. uint64_t
  100. IO::get_blocksize() const
  101. {
  102. return blocksize_;
  103. }
  104. /******************************************************************
  105. * getter function for number of columns contained in data at id
  106. *****************************************************************/
  107. uint64_t
  108. IO::get_num_columns(int id) const
  109. {
  110. if (id < 0 || id > num_blocks_-1)
  111. return -1;
  112. return block_info_[id].num_cols;
  113. }
  114. /******************************************************************
  115. * getter function returning the block size
  116. ******************************************************************/
  117. vector<string>
  118. IO::get_blocknames() const
  119. {
  120. return block_names_;
  121. }
  122. /*******************************************************************
  123. * getter function returning the type of output
  124. ******************************************************************/
  125. uint16_t
  126. IO::get_output_type() const
  127. {
  128. return output_type_;
  129. }
  130. /*******************************************************************
  131. * getter function to return the iteration list
  132. ******************************************************************/
  133. std::vector<double>
  134. IO::get_iteration_list() const
  135. {
  136. return time_it_list_;
  137. }
  138. /*******************************************************************
  139. * returns the id corresponding to the data set of the given name
  140. *******************************************************************/
  141. int
  142. IO::get_id_from_name(std::string data_name) const
  143. {
  144. std::map<std::string, int>::const_iterator id =
  145. block_names_map_.find(data_name);
  146. if (id == block_names_map_.end())
  147. return -1;
  148. return id->second;
  149. }
  150. /*******************************************************************
  151. * getter function for the raw data either selected by name or
  152. * by id
  153. *
  154. * two types of data are returned via references:
  155. * 1) data_list: a vector of data rows containing doubles
  156. * 2) data_types: a vector of ints describing the underlying datatype
  157. * contained in each data_list row.
  158. *
  159. * NOTE: this method will not cast the data according to its true
  160. * data type as returned by data_type. It's up to the caller
  161. * to decide if this is necessary.
  162. ******************************************************************/
  163. bool
  164. IO::get_data(std::vector<std::vector<double> >& data_list,
  165. std::vector<uint16_t>& data_types, string name) const
  166. {
  167. int id = get_id_from_name(name);
  168. if (id == -1)
  169. return false;
  170. /* call via block id */
  171. return get_data(data_list, data_types, id);
  172. }
  173. bool
  174. IO::get_data(std::vector<std::vector<double> >& data_list,
  175. std::vector<uint16_t>& data_types, int id) const
  176. {
  177. if (id < 0 || id > num_blocks_-1)
  178. return false;
  179. EntryType entry = block_info_[id];
  180. data_list.reserve(entry.num_cols);
  181. for (uint64_t i=0; i < entry.num_cols; ++i)
  182. {
  183. std::vector<double> column;
  184. column.reserve(blocksize_ + 1);
  185. data_list.push_back(column);
  186. data_types.push_back(entry.data_type[i]);
  187. }
  188. /* fast forward to starting location of data */
  189. char* chunkLocation = buffer_ +
  190. fileoffset_to_data_ + sizeof(double) * buffer_size_ * entry.offset;
  191. uint64_t numElements = 0;
  192. uint64_t stream_block = 1;
  193. while (numElements < blocksize_)
  194. {
  195. /* check if we need to fast forward to next block
  196. * NOTE: We need a special case for the last and potentiall incomplete
  197. * stream block with less than buffer_size elements
  198. */
  199. if (numElements >= (stream_block * buffer_size_))
  200. {
  201. chunkLocation = buffer_ + fileoffset_to_data_ + sizeof(double)
  202. * (buffer_size_ * stream_block) * total_columns_;
  203. if ((blocksize_ - numElements) >= buffer_size_)
  204. {
  205. chunkLocation += sizeof(double) * buffer_size_ * entry.offset;
  206. }
  207. else
  208. {
  209. chunkLocation += sizeof(double) * (blocksize_ - numElements)
  210. * entry.offset;
  211. }
  212. ++stream_block;
  213. }
  214. for (uint64_t i=0; i < entry.num_cols; ++i)
  215. {
  216. double* temp = reinterpret_cast<double*>(chunkLocation);
  217. data_list[i].push_back(*temp);
  218. chunkLocation += sizeof(double);
  219. }
  220. ++numElements;
  221. }
  222. return true;
  223. }
  224. /*******************************************************************
  225. * member function printing basic file info as extracted from
  226. * the headers
  227. *******************************************************************/
  228. void
  229. IO::info() const
  230. {
  231. cout << "MCell3 binary reader v" << VERSION << " (C) "
  232. << DATE << " " << AUTHOR << NL
  233. << "----------------------------------------------------" << NL
  234. << PRE << "found " << num_blocks_ << " datablocks with " << blocksize_
  235. << " items each." << NL;
  236. if (output_type_ == STEP)
  237. {
  238. cout << PRE << "count STEP was " << stepsize_ << "[s]" << NL;
  239. }
  240. else if (output_type_ == TIME_LIST)
  241. {
  242. cout << PRE << "Output was generated via TIME_LIST" << NL;
  243. }
  244. else if (output_type_ == ITERATION_LIST)
  245. {
  246. cout << PRE << "Output was generated via ITERATION_LIST" << NL;
  247. }
  248. else
  249. {
  250. cout << PRE << "Unknown output type" << NL;
  251. }
  252. cout << endl;
  253. }
  254. /******************************************************************
  255. *
  256. * private member functions
  257. *
  258. ******************************************************************/
  259. /*******************************************************************
  260. * member function opening the binary file
  261. *******************************************************************/
  262. bool
  263. IO::open_()
  264. {
  265. FILE* input_file = fopen(file_name_.c_str(),"r");
  266. if (input_file == NULL)
  267. {
  268. return false;
  269. }
  270. /* call the proper parsing code depending on if the file is uncompressed
  271. * data of a compressed bzip file */
  272. string::size_type location = file_name_.find_last_of(".");
  273. string extension = file_name_.substr(location);
  274. bool status = false;
  275. if (extension == ".gz") {
  276. status = open_gz_file_(input_file);
  277. }
  278. else if (extension == ".bz2")
  279. {
  280. status = open_bz_file_(input_file);
  281. }
  282. // try it as uncompressed binary file
  283. else
  284. {
  285. status = open_binary_file_(input_file);
  286. }
  287. /* done with file */
  288. fclose(input_file);
  289. return status;
  290. }
  291. /*****************************************************************
  292. * member function parsing the header
  293. *****************************************************************/
  294. bool
  295. IO::parse_header_()
  296. {
  297. /* keep track of where we are */
  298. uint64_t charCounter = 0;
  299. charCounter = parse_api_tag_(charCounter);
  300. if (charCounter == 0)
  301. return false;
  302. charCounter = parse_block_information_(charCounter);
  303. charCounter = parse_block_names_(charCounter);
  304. // store the total number of columns and offset to begin of data
  305. fileoffset_to_data_ = charCounter;
  306. return true;
  307. }
  308. /*********************************************************************
  309. * parse the api tag
  310. ********************************************************************/
  311. uint64_t
  312. IO::parse_api_tag_(uint64_t charCounter)
  313. {
  314. /** read API tag and make sure we have the proper output file type */
  315. string receivedAPItag;
  316. for (unsigned int i=0; i < needed_api_tag_.size(); ++i)
  317. {
  318. receivedAPItag.push_back(*(buffer_ + charCounter));
  319. ++charCounter;
  320. }
  321. ++charCounter;
  322. if (receivedAPItag != needed_api_tag_)
  323. {
  324. return 0;
  325. }
  326. return charCounter;
  327. }
  328. /*********************************************************************
  329. * parse the block information
  330. ********************************************************************/
  331. uint64_t
  332. IO::parse_block_information_(uint64_t charCounter)
  333. {
  334. output_type_ = *reinterpret_cast<uint16_t*>(buffer_ + charCounter);
  335. charCounter += sizeof(uint16_t);
  336. blocksize_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  337. charCounter += sizeof(uint64_t);
  338. uint64_t length = 0;
  339. if (output_type_ == STEP) // STEP
  340. {
  341. length = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  342. charCounter += sizeof(uint64_t);
  343. stepsize_ = *reinterpret_cast<double*>(buffer_ + charCounter);
  344. charCounter += sizeof(double);
  345. }
  346. else if (output_type_ == TIME_LIST || output_type_ == ITERATION_LIST)
  347. {
  348. length = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  349. charCounter += sizeof(uint64_t);
  350. for (uint64_t i = 0; i < length; ++i)
  351. {
  352. double temp;
  353. temp = *reinterpret_cast<double*>(buffer_ + charCounter);
  354. charCounter += sizeof(double);
  355. time_it_list_.push_back(temp);
  356. }
  357. }
  358. else
  359. {
  360. std::cout << "Unknown count output format" << std::endl;
  361. return false;
  362. }
  363. /* parse output buffer size */
  364. buffer_size_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  365. charCounter += sizeof(uint64_t);
  366. /* parse number of blocks */
  367. num_blocks_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  368. charCounter += sizeof(uint64_t);
  369. return charCounter;
  370. }
  371. /*********************************************************************
  372. * parse the block names
  373. ********************************************************************/
  374. uint64_t
  375. IO::parse_block_names_(uint64_t charCounter)
  376. {
  377. uint64_t block_counter = 0;
  378. uint64_t offset = 0;
  379. string buffer;
  380. while ((block_counter != num_blocks_))
  381. {
  382. char currentChar = *(buffer_ + charCounter);
  383. /** read until we hit the end of a substring; this is then copied to a
  384. ** new string and appended to available_blocks */
  385. if (currentChar == '\0')
  386. {
  387. EntryType entry;
  388. block_names_.push_back(buffer);
  389. block_names_map_[buffer] = block_counter;
  390. entry.name = buffer;
  391. buffer.clear();
  392. ++charCounter;
  393. entry.offset = offset;
  394. entry.num_cols = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
  395. offset += entry.num_cols;
  396. charCounter += sizeof(uint64_t);
  397. for(uint64_t i=0; i<entry.num_cols; ++i)
  398. {
  399. entry.data_type.push_back(*reinterpret_cast<uint16_t*> (buffer_ + charCounter));
  400. charCounter += sizeof(uint16_t);
  401. }
  402. block_info_.push_back(entry);
  403. if (++block_counter > num_blocks_)
  404. {
  405. return false;
  406. }
  407. }
  408. else
  409. {
  410. buffer.push_back(currentChar);
  411. ++charCounter;
  412. }
  413. }
  414. total_columns_ = offset;
  415. return charCounter;
  416. }
  417. /**********************************************************************
  418. * uncompress and read already opened binary file into a buffer_
  419. *********************************************************************/
  420. bool
  421. IO::open_gz_file_(FILE* inputFile)
  422. {
  423. // get the file descriptor of the FILE object
  424. int fdKeep = fileno(inputFile);
  425. int fd = dup(fdKeep);
  426. if (fd == -1) {
  427. return false;
  428. }
  429. gzFile myFile = gzdopen OF((fd, "r"));
  430. if ( myFile == NULL ) {
  431. return false;
  432. }
  433. /* keep reading data in chunks of 1M */
  434. const long chunkSize = 1048576L;
  435. int chunkCount = 1;
  436. int status = 0;
  437. char* nextChunkLocation = NULL;
  438. buffer_ = NULL;
  439. do
  440. {
  441. /* request next chunk */
  442. buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount/sizeof(char) );
  443. if ( buffer_ == NULL )
  444. {
  445. return false;
  446. }
  447. /* read it */
  448. nextChunkLocation = buffer_ + chunkSize * (chunkCount-1);
  449. status = gzread OF((myFile, nextChunkLocation, chunkSize));
  450. if ( status == -1 ) {
  451. return false;
  452. }
  453. ++chunkCount;
  454. } while ( status != 0 );
  455. gzclose(myFile);
  456. return true;
  457. }
  458. /******************************************************************
  459. * uncompress and read already opened binary file into a Buffer_
  460. ******************************************************************/
  461. bool
  462. IO::open_bz_file_(FILE* inputFile)
  463. {
  464. int bzError;
  465. BZFILE* myFile;
  466. myFile = BZ2_bzReadOpen(&bzError, inputFile, 0, 0, NULL, 0);
  467. if ( bzError != BZ_OK )
  468. {
  469. return false;
  470. }
  471. /* keep reading data in chunks of 1M */
  472. const long chunkSize = 1048576L;
  473. int chunkCount = 1;
  474. char* nextChunkLocation = NULL;
  475. buffer_ = NULL;
  476. do
  477. {
  478. /* request next chunk */
  479. buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount/sizeof(char) );
  480. if ( buffer_ == NULL )
  481. {
  482. return false;
  483. }
  484. /* read it */
  485. nextChunkLocation = buffer_ + chunkSize * (chunkCount-1);
  486. BZ2_bzRead(&bzError, myFile, nextChunkLocation, chunkSize);
  487. if ( ( bzError != BZ_OK ) && ( bzError != BZ_STREAM_END ) )
  488. {
  489. return false;
  490. }
  491. ++chunkCount;
  492. } while ( bzError != BZ_STREAM_END );
  493. /* release libbzip2's memory */
  494. BZ2_bzReadClose(&bzError, myFile);
  495. if ( bzError != BZ_OK )
  496. {
  497. return false;
  498. }
  499. return true;
  500. }
  501. /******************************************************************
  502. * read already opened binary file into a buffer_
  503. ******************************************************************/
  504. bool IO::open_binary_file_(FILE* inputFile)
  505. {
  506. /* keep reading data in chunks of 1M */
  507. const unsigned int chunkSize = 1048576;
  508. int chunkCount = 1;
  509. size_t num_read = 0;
  510. buffer_ = NULL;
  511. do
  512. {
  513. /* request next chunk */
  514. buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount*sizeof(char) );
  515. if ( buffer_ == NULL )
  516. {
  517. return false;
  518. }
  519. /* read it */
  520. char* nextChunkLocation = buffer_ + chunkSize * (chunkCount-1) *
  521. sizeof(char);
  522. num_read = fread(nextChunkLocation, sizeof(char), chunkSize, inputFile);
  523. ++chunkCount;
  524. } while ( num_read == chunkSize );
  525. if ((feof(inputFile) != 0) && (ferror(inputFile) != 0 ) )
  526. {
  527. return false;
  528. }
  529. return true;
  530. }
  531. /*************************************************************************
  532. *
  533. * implementation of C interface
  534. *
  535. ************************************************************************/
  536. /*
  537. * construct a new MBDIO data parsing object for the binary reaction
  538. * data file file_name
  539. */
  540. MBDIO
  541. mbd_new(const char* file_name)
  542. {
  543. return reinterpret_cast<MBDIO>(new IO(file_name));
  544. }
  545. /*
  546. * delete an existing MBDIO data parsing object
  547. */
  548. void
  549. mbd_delete(MBDIO obj)
  550. {
  551. delete obj;
  552. return;
  553. }
  554. /*
  555. * initialize the file reader object
  556. */
  557. int
  558. mbd_initialize(MBDIO obj)
  559. {
  560. return (obj->initialize() ? 1 : -1);
  561. }
  562. /*
  563. * returns the number of distinct data items (data_blocks)
  564. * in the data set handled by the data reader
  565. */
  566. uint64_t
  567. mbd_get_num_datablocks(MBDIO obj)
  568. {
  569. return obj->get_num_datablocks();
  570. }
  571. /*
  572. * returns the blocksize, i.e. the number of data items per
  573. * data set.
  574. */
  575. uint64_t
  576. mbd_get_blocksize(MBDIO obj)
  577. {
  578. return obj->get_blocksize();
  579. }
  580. /*
  581. * returns the type of output (STEP, TIME_LIST, ITERATION_LIST)
  582. */
  583. uint16_t
  584. mbd_get_output_type(MBDIO obj)
  585. {
  586. return obj->get_output_type();
  587. }
  588. /*
  589. * returns the step size for output type STEP
  590. */
  591. double
  592. mbd_get_stepsize(MBDIO obj)
  593. {
  594. return obj->get_stepsize();
  595. }
  596. /*
  597. * returns the names of each datablock and their count
  598. * NOTE: the caller is responsible for providing an array block_names
  599. * of the proper size.
  600. */
  601. void
  602. mbd_get_blocknames(MBDIO obj, char** block_names, size_t length)
  603. {
  604. vector<string> names = obj->get_blocknames();
  605. for (int i = 0; i < names.size(); ++i)
  606. {
  607. strncpy(block_names[i], names[i].c_str(), length);
  608. }
  609. return;
  610. }
  611. /*
  612. * returns an array of timepoints/iterations and the count
  613. * NOTE: the caller is responsible for providing an array iteration_list
  614. * of proper size (blocksize).
  615. */
  616. void
  617. mbd_get_iteration_list(MBDIO obj, double* iteration_list)
  618. {
  619. vector<double> iter_list = obj->get_iteration_list();
  620. for (int i = 0; i < iter_list.size(); ++i)
  621. {
  622. iteration_list[i] = iter_list[i];
  623. }
  624. return;
  625. }
  626. /*
  627. * return the number of available data columns in data set
  628. * with id.
  629. */
  630. uint64_t
  631. mbd_get_num_columns_by_id(MBDIO obj, int id)
  632. {
  633. return obj->get_num_columns(id);
  634. }
  635. /*
  636. * return the data values corresponding to the data set with the
  637. * given id. The id is identical to the index of a given data set
  638. * in the list of blocknames.
  639. * NOTE: The called is responsible for providing data_list and
  640. * data_types arrays of the correct size.
  641. */
  642. int
  643. mbd_get_data_by_id(MBDIO obj, double** data_list, uint16_t* data_types,
  644. int id)
  645. {
  646. vector<vector<double> > raw_data;
  647. vector<uint16_t> raw_data_types;
  648. if (!obj->get_data(raw_data, raw_data_types, id))
  649. return -1;
  650. size_t col_count = raw_data_types.size();
  651. size_t row_count = obj->get_blocksize();
  652. /* extract data */
  653. for (int col = 0; col < col_count; ++col)
  654. {
  655. for (int row = 0; row < row_count; ++row)
  656. {
  657. data_list[col][row] = raw_data[col][row];
  658. }
  659. }
  660. /* extract data types */
  661. for (int col = 0; col < col_count; ++col)
  662. {
  663. data_types[col] = raw_data_types[col];
  664. }
  665. return 0;
  666. }
  667. /*
  668. * return the id corresponding to the dataset with the given
  669. * name. If the name doesn't match any dataset -1 is returned.
  670. */
  671. int
  672. mbd_get_id_from_name(MBDIO obj, char* name)
  673. {
  674. return obj->get_id_from_name(string(name));
  675. }
  676. /*
  677. * return the data values corresponding to the data set with the
  678. * given name. The id is identical to the index of a given data set
  679. * in the list of blocknames.
  680. * NOTE: The called is responsible for providing data_list and
  681. * data_types arrays of the correct size.
  682. */
  683. int
  684. mbd_get_data_by_name(MBDIO obj, double** data, uint16_t* data_types,
  685. char* data_name)
  686. {
  687. int id = obj->get_id_from_name(string(data_name));
  688. if (id == -1)
  689. return 1;
  690. return mbd_get_data_by_id(obj, data, data_types, id);
  691. }
  692. /*
  693. * prints basic information regarding the data contained in the
  694. * reader object.
  695. */
  696. void
  697. mbd_info(MBDIO obj)
  698. {
  699. return obj->info();
  700. }