123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869 |
- /*****************************************************************
- *
- * the class IO provides all the functionality to access a
- * binary mcell3 reaction data output file.
- *
- * (c) 2008 Markus Dittrich
- *
- * This program is free software; you can redistribute it
- * and/or modify it under the terms of the GNU General Public
- * License Version 3 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License Version 3 for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the Free
- * Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- ****************************************************************/
- /* C headers */
- #include <bzlib.h>
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include <errno.h>
- #include <zlib.h>
- /* STL headers */
- #include <iostream>
- /* local headers */
- #include "mbd.h"
- /** pull in some things from namespace std */
- using std::vector;
- using std::cout;
- using std::endl;
- using std::string;
- /*****************************************************************
- * constructor
- *****************************************************************/
- IO::IO(string afile_name)
- :
- file_name_(afile_name),
- num_blocks_(0),
- output_type_(0),
- blocksize_(0),
- stepsize_(0.0),
- needed_api_tag_("MCELL_BINARY_API_2")
- {}
- /******************************************************************
- * destructor
- ******************************************************************/
- IO::~IO()
- {
- free(buffer_);
- }
- /*****************************************************************
- *
- * public member functions
- *
- *****************************************************************/
- /******************************************************************
- * member function initizializing the object, vie opening
- * the file and parsing the header
- ******************************************************************/
- bool
- IO::initialize()
- {
- /* open file, don't parse headers if that fails already */
- bool status = open_();
- if (!status)
- {
- return false;
- }
- /* parse headers */
- status = parse_header_();
- return status;
- }
- /*******************************************************************
- * getter function returning the number of datablocks
- *******************************************************************/
- uint64_t
- IO::get_num_datablocks() const
- {
- return num_blocks_;
- }
- /*******************************************************************
- * getter function returning the STEP
- *******************************************************************/
- double
- IO::get_stepsize() const
- {
- return stepsize_;
- }
- /******************************************************************
- * getter function returning the block size
- ******************************************************************/
- uint64_t
- IO::get_blocksize() const
- {
- return blocksize_;
- }
- /******************************************************************
- * getter function for number of columns contained in data at id
- *****************************************************************/
- uint64_t
- IO::get_num_columns(int id) const
- {
- if (id < 0 || id > num_blocks_-1)
- return -1;
- return block_info_[id].num_cols;
- }
- /******************************************************************
- * getter function returning the block size
- ******************************************************************/
- vector<string>
- IO::get_blocknames() const
- {
- return block_names_;
- }
- /*******************************************************************
- * getter function returning the type of output
- ******************************************************************/
- uint16_t
- IO::get_output_type() const
- {
- return output_type_;
- }
- /*******************************************************************
- * getter function to return the iteration list
- ******************************************************************/
- std::vector<double>
- IO::get_iteration_list() const
- {
- return time_it_list_;
- }
- /*******************************************************************
- * returns the id corresponding to the data set of the given name
- *******************************************************************/
- int
- IO::get_id_from_name(std::string data_name) const
- {
- std::map<std::string, int>::const_iterator id =
- block_names_map_.find(data_name);
- if (id == block_names_map_.end())
- return -1;
- return id->second;
- }
- /*******************************************************************
- * getter function for the raw data either selected by name or
- * by id
- *
- * two types of data are returned via references:
- * 1) data_list: a vector of data rows containing doubles
- * 2) data_types: a vector of ints describing the underlying datatype
- * contained in each data_list row.
- *
- * NOTE: this method will not cast the data according to its true
- * data type as returned by data_type. It's up to the caller
- * to decide if this is necessary.
- ******************************************************************/
- bool
- IO::get_data(std::vector<std::vector<double> >& data_list,
- std::vector<uint16_t>& data_types, string name) const
- {
- int id = get_id_from_name(name);
- if (id == -1)
- return false;
- /* call via block id */
- return get_data(data_list, data_types, id);
- }
- bool
- IO::get_data(std::vector<std::vector<double> >& data_list,
- std::vector<uint16_t>& data_types, int id) const
- {
- if (id < 0 || id > num_blocks_-1)
- return false;
- EntryType entry = block_info_[id];
- data_list.reserve(entry.num_cols);
- for (uint64_t i=0; i < entry.num_cols; ++i)
- {
- std::vector<double> column;
- column.reserve(blocksize_ + 1);
- data_list.push_back(column);
- data_types.push_back(entry.data_type[i]);
- }
- /* fast forward to starting location of data */
- char* chunkLocation = buffer_ +
- fileoffset_to_data_ + sizeof(double) * buffer_size_ * entry.offset;
- uint64_t numElements = 0;
- uint64_t stream_block = 1;
- while (numElements < blocksize_)
- {
- /* check if we need to fast forward to next block
- * NOTE: We need a special case for the last and potentiall incomplete
- * stream block with less than buffer_size elements
- */
- if (numElements >= (stream_block * buffer_size_))
- {
- chunkLocation = buffer_ + fileoffset_to_data_ + sizeof(double)
- * (buffer_size_ * stream_block) * total_columns_;
- if ((blocksize_ - numElements) >= buffer_size_)
- {
- chunkLocation += sizeof(double) * buffer_size_ * entry.offset;
- }
- else
- {
- chunkLocation += sizeof(double) * (blocksize_ - numElements)
- * entry.offset;
- }
- ++stream_block;
- }
- for (uint64_t i=0; i < entry.num_cols; ++i)
- {
- double* temp = reinterpret_cast<double*>(chunkLocation);
- data_list[i].push_back(*temp);
- chunkLocation += sizeof(double);
- }
- ++numElements;
- }
- return true;
- }
- /*******************************************************************
- * member function printing basic file info as extracted from
- * the headers
- *******************************************************************/
- void
- IO::info() const
- {
- cout << "MCell3 binary reader v" << VERSION << " (C) "
- << DATE << " " << AUTHOR << NL
- << "----------------------------------------------------" << NL
- << PRE << "found " << num_blocks_ << " datablocks with " << blocksize_
- << " items each." << NL;
-
- if (output_type_ == STEP)
- {
- cout << PRE << "count STEP was " << stepsize_ << "[s]" << NL;
- }
- else if (output_type_ == TIME_LIST)
- {
- cout << PRE << "Output was generated via TIME_LIST" << NL;
- }
- else if (output_type_ == ITERATION_LIST)
- {
- cout << PRE << "Output was generated via ITERATION_LIST" << NL;
- }
- else
- {
- cout << PRE << "Unknown output type" << NL;
- }
- cout << endl;
- }
- /******************************************************************
- *
- * private member functions
- *
- ******************************************************************/
- /*******************************************************************
- * member function opening the binary file
- *******************************************************************/
- bool
- IO::open_()
- {
- FILE* input_file = fopen(file_name_.c_str(),"r");
- if (input_file == NULL)
- {
- return false;
- }
- /* call the proper parsing code depending on if the file is uncompressed
- * data of a compressed bzip file */
- string::size_type location = file_name_.find_last_of(".");
- string extension = file_name_.substr(location);
- bool status = false;
- if (extension == ".gz") {
- status = open_gz_file_(input_file);
- }
- else if (extension == ".bz2")
- {
- status = open_bz_file_(input_file);
- }
- // try it as uncompressed binary file
- else
- {
- status = open_binary_file_(input_file);
- }
- /* done with file */
- fclose(input_file);
- return status;
- }
- /*****************************************************************
- * member function parsing the header
- *****************************************************************/
- bool
- IO::parse_header_()
- {
- /* keep track of where we are */
- uint64_t charCounter = 0;
- charCounter = parse_api_tag_(charCounter);
- if (charCounter == 0)
- return false;
- charCounter = parse_block_information_(charCounter);
- charCounter = parse_block_names_(charCounter);
- // store the total number of columns and offset to begin of data
- fileoffset_to_data_ = charCounter;
- return true;
- }
- /*********************************************************************
- * parse the api tag
- ********************************************************************/
- uint64_t
- IO::parse_api_tag_(uint64_t charCounter)
- {
- /** read API tag and make sure we have the proper output file type */
- string receivedAPItag;
- for (unsigned int i=0; i < needed_api_tag_.size(); ++i)
- {
- receivedAPItag.push_back(*(buffer_ + charCounter));
- ++charCounter;
- }
- ++charCounter;
- if (receivedAPItag != needed_api_tag_)
- {
- return 0;
- }
- return charCounter;
- }
- /*********************************************************************
- * parse the block information
- ********************************************************************/
- uint64_t
- IO::parse_block_information_(uint64_t charCounter)
- {
- output_type_ = *reinterpret_cast<uint16_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint16_t);
- blocksize_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint64_t);
- uint64_t length = 0;
- if (output_type_ == STEP) // STEP
- {
- length = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint64_t);
- stepsize_ = *reinterpret_cast<double*>(buffer_ + charCounter);
- charCounter += sizeof(double);
- }
- else if (output_type_ == TIME_LIST || output_type_ == ITERATION_LIST)
- {
- length = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint64_t);
- for (uint64_t i = 0; i < length; ++i)
- {
- double temp;
- temp = *reinterpret_cast<double*>(buffer_ + charCounter);
- charCounter += sizeof(double);
- time_it_list_.push_back(temp);
- }
- }
- else
- {
- std::cout << "Unknown count output format" << std::endl;
- return false;
- }
- /* parse output buffer size */
- buffer_size_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint64_t);
- /* parse number of blocks */
- num_blocks_ = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- charCounter += sizeof(uint64_t);
- return charCounter;
- }
- /*********************************************************************
- * parse the block names
- ********************************************************************/
- uint64_t
- IO::parse_block_names_(uint64_t charCounter)
- {
- uint64_t block_counter = 0;
- uint64_t offset = 0;
- string buffer;
- while ((block_counter != num_blocks_))
- {
- char currentChar = *(buffer_ + charCounter);
- /** read until we hit the end of a substring; this is then copied to a
- ** new string and appended to available_blocks */
- if (currentChar == '\0')
- {
- EntryType entry;
- block_names_.push_back(buffer);
- block_names_map_[buffer] = block_counter;
- entry.name = buffer;
- buffer.clear();
- ++charCounter;
- entry.offset = offset;
- entry.num_cols = *reinterpret_cast<uint64_t*>(buffer_ + charCounter);
- offset += entry.num_cols;
- charCounter += sizeof(uint64_t);
- for(uint64_t i=0; i<entry.num_cols; ++i)
- {
- entry.data_type.push_back(*reinterpret_cast<uint16_t*> (buffer_ + charCounter));
- charCounter += sizeof(uint16_t);
- }
- block_info_.push_back(entry);
- if (++block_counter > num_blocks_)
- {
- return false;
- }
- }
- else
- {
- buffer.push_back(currentChar);
- ++charCounter;
- }
- }
- total_columns_ = offset;
- return charCounter;
- }
- /**********************************************************************
- * uncompress and read already opened binary file into a buffer_
- *********************************************************************/
- bool
- IO::open_gz_file_(FILE* inputFile)
- {
- // get the file descriptor of the FILE object
- int fdKeep = fileno(inputFile);
- int fd = dup(fdKeep);
- if (fd == -1) {
- return false;
- }
- gzFile myFile = gzdopen OF((fd, "r"));
- if ( myFile == NULL ) {
- return false;
- }
- /* keep reading data in chunks of 1M */
- const long chunkSize = 1048576L;
- int chunkCount = 1;
- int status = 0;
- char* nextChunkLocation = NULL;
- buffer_ = NULL;
-
- do
- {
- /* request next chunk */
- buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount/sizeof(char) );
- if ( buffer_ == NULL )
- {
- return false;
- }
- /* read it */
- nextChunkLocation = buffer_ + chunkSize * (chunkCount-1);
- status = gzread OF((myFile, nextChunkLocation, chunkSize));
- if ( status == -1 ) {
- return false;
- }
- ++chunkCount;
- } while ( status != 0 );
- gzclose(myFile);
- return true;
- }
- /******************************************************************
- * uncompress and read already opened binary file into a Buffer_
- ******************************************************************/
- bool
- IO::open_bz_file_(FILE* inputFile)
- {
- int bzError;
- BZFILE* myFile;
- myFile = BZ2_bzReadOpen(&bzError, inputFile, 0, 0, NULL, 0);
- if ( bzError != BZ_OK )
- {
- return false;
- }
- /* keep reading data in chunks of 1M */
- const long chunkSize = 1048576L;
- int chunkCount = 1;
- char* nextChunkLocation = NULL;
- buffer_ = NULL;
- do
- {
- /* request next chunk */
- buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount/sizeof(char) );
- if ( buffer_ == NULL )
- {
- return false;
- }
- /* read it */
- nextChunkLocation = buffer_ + chunkSize * (chunkCount-1);
- BZ2_bzRead(&bzError, myFile, nextChunkLocation, chunkSize);
-
- if ( ( bzError != BZ_OK ) && ( bzError != BZ_STREAM_END ) )
- {
- return false;
- }
- ++chunkCount;
- } while ( bzError != BZ_STREAM_END );
- /* release libbzip2's memory */
- BZ2_bzReadClose(&bzError, myFile);
- if ( bzError != BZ_OK )
- {
- return false;
- }
- return true;
- }
- /******************************************************************
- * read already opened binary file into a buffer_
- ******************************************************************/
- bool IO::open_binary_file_(FILE* inputFile)
- {
- /* keep reading data in chunks of 1M */
- const unsigned int chunkSize = 1048576;
- int chunkCount = 1;
- size_t num_read = 0;
- buffer_ = NULL;
-
- do
- {
- /* request next chunk */
- buffer_ = (char*)realloc(buffer_, chunkSize*chunkCount*sizeof(char) );
- if ( buffer_ == NULL )
- {
- return false;
- }
- /* read it */
- char* nextChunkLocation = buffer_ + chunkSize * (chunkCount-1) *
- sizeof(char);
- num_read = fread(nextChunkLocation, sizeof(char), chunkSize, inputFile);
- ++chunkCount;
- } while ( num_read == chunkSize );
- if ((feof(inputFile) != 0) && (ferror(inputFile) != 0 ) )
- {
- return false;
- }
- return true;
- }
- /*************************************************************************
- *
- * implementation of C interface
- *
- ************************************************************************/
- /*
- * construct a new MBDIO data parsing object for the binary reaction
- * data file file_name
- */
- MBDIO
- mbd_new(const char* file_name)
- {
- return reinterpret_cast<MBDIO>(new IO(file_name));
- }
- /*
- * delete an existing MBDIO data parsing object
- */
- void
- mbd_delete(MBDIO obj)
- {
- delete obj;
- return;
- }
- /*
- * initialize the file reader object
- */
- int
- mbd_initialize(MBDIO obj)
- {
- return (obj->initialize() ? 1 : -1);
- }
- /*
- * returns the number of distinct data items (data_blocks)
- * in the data set handled by the data reader
- */
- uint64_t
- mbd_get_num_datablocks(MBDIO obj)
- {
- return obj->get_num_datablocks();
- }
- /*
- * returns the blocksize, i.e. the number of data items per
- * data set.
- */
- uint64_t
- mbd_get_blocksize(MBDIO obj)
- {
- return obj->get_blocksize();
- }
- /*
- * returns the type of output (STEP, TIME_LIST, ITERATION_LIST)
- */
- uint16_t
- mbd_get_output_type(MBDIO obj)
- {
- return obj->get_output_type();
- }
-
- /*
- * returns the step size for output type STEP
- */
- double
- mbd_get_stepsize(MBDIO obj)
- {
- return obj->get_stepsize();
- }
- /*
- * returns the names of each datablock and their count
- * NOTE: the caller is responsible for providing an array block_names
- * of the proper size.
- */
- void
- mbd_get_blocknames(MBDIO obj, char** block_names, size_t length)
- {
- vector<string> names = obj->get_blocknames();
- for (int i = 0; i < names.size(); ++i)
- {
- strncpy(block_names[i], names[i].c_str(), length);
- }
- return;
- }
- /*
- * returns an array of timepoints/iterations and the count
- * NOTE: the caller is responsible for providing an array iteration_list
- * of proper size (blocksize).
- */
- void
- mbd_get_iteration_list(MBDIO obj, double* iteration_list)
- {
- vector<double> iter_list = obj->get_iteration_list();
- for (int i = 0; i < iter_list.size(); ++i)
- {
- iteration_list[i] = iter_list[i];
- }
- return;
- }
- /*
- * return the number of available data columns in data set
- * with id.
- */
- uint64_t
- mbd_get_num_columns_by_id(MBDIO obj, int id)
- {
- return obj->get_num_columns(id);
- }
- /*
- * return the data values corresponding to the data set with the
- * given id. The id is identical to the index of a given data set
- * in the list of blocknames.
- * NOTE: The called is responsible for providing data_list and
- * data_types arrays of the correct size.
- */
- int
- mbd_get_data_by_id(MBDIO obj, double** data_list, uint16_t* data_types,
- int id)
- {
- vector<vector<double> > raw_data;
- vector<uint16_t> raw_data_types;
- if (!obj->get_data(raw_data, raw_data_types, id))
- return -1;
-
- size_t col_count = raw_data_types.size();
- size_t row_count = obj->get_blocksize();
- /* extract data */
- for (int col = 0; col < col_count; ++col)
- {
- for (int row = 0; row < row_count; ++row)
- {
- data_list[col][row] = raw_data[col][row];
- }
- }
- /* extract data types */
- for (int col = 0; col < col_count; ++col)
- {
- data_types[col] = raw_data_types[col];
- }
- return 0;
- }
-
- /*
- * return the id corresponding to the dataset with the given
- * name. If the name doesn't match any dataset -1 is returned.
- */
- int
- mbd_get_id_from_name(MBDIO obj, char* name)
- {
- return obj->get_id_from_name(string(name));
- }
- /*
- * return the data values corresponding to the data set with the
- * given name. The id is identical to the index of a given data set
- * in the list of blocknames.
- * NOTE: The called is responsible for providing data_list and
- * data_types arrays of the correct size.
- */
- int
- mbd_get_data_by_name(MBDIO obj, double** data, uint16_t* data_types,
- char* data_name)
- {
- int id = obj->get_id_from_name(string(data_name));
- if (id == -1)
- return 1;
- return mbd_get_data_by_id(obj, data, data_types, id);
- }
- /*
- * prints basic information regarding the data contained in the
- * reader object.
- */
- void
- mbd_info(MBDIO obj)
- {
- return obj->info();
- }
|