Using tables_io to write to an hdf5 file from multiple places
# Standard imports
import os
from collections import OrderedDict
import numpy as np
import tables_io
#from ..tests.testUtils import make_test_data
import astropy.table as apTable
from mpi4py import MPI
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 7
5 import tables_io
6 #from ..tests.testUtils import make_test_data
----> 7 import astropy.table as apTable
8 from mpi4py import MPI
ModuleNotFoundError: No module named 'astropy'
Set up
Some test data.
Ok, lets make some test data write it to an hdf5 file.
def make_test_data():
nrow = 1000
vect_size = 20
mat_size = 5
scalar = np.random.uniform(size=nrow)
vect = np.random.uniform(size=nrow * vect_size).reshape(nrow, vect_size)
matrix = np.random.uniform(size=nrow * mat_size * mat_size).reshape(nrow, mat_size, mat_size)
data = dict(scalar=scalar, vect=vect, matrix=matrix)
table = apTable.Table(data)
table.meta["a"] = 1
table.meta["b"] = None
table.meta["c"] = [3, 4, 5]
small_table = apTable.Table(dict(a=np.ones(21), b=np.zeros(21)))
small_table.meta["small"] = True
return(dict(data=table, md=small_table))
data = make_test_data()
Some utility functions to get the output data shape
def get_shapes_and_type(aDict):
return OrderedDict([(key, (val.shape, val.dtype)) for key, val in aDict.items()])
def modify_shape(shapeIn, length):
shapeList = list(shapeIn)
shapeList[0] = length
return tuple(shapeList)
def set_lengths(aDict, length):
return OrderedDict([(key, (modify_shape(val[0], length), val[1])) for key, val in aDict.items()])
Create the file
Here we get the output shapes and data types
dout = {'data':set_lengths(get_shapes_and_type(data['data']), 10000)}
print(dout)
{'data': OrderedDict({'scalar': ((10000,), dtype('float64')), 'vect': ((10000, 20), dtype('float64')), 'matrix': ((10000, 5, 5), dtype('float64'))})}
Here we allocate the file
os.unlink('test_multi_write.hdf5')
from tables_io import hdf5
groups, fout = hdf5.initialize_HDF5_write('test_mpi_write.hdf5',comm=MPI.COMM_WORLD, **dout)
Write the file
Here we fill the file
for i in range(10):
data = make_test_data()
start = i*1000
end = (i+1)*1000
hdf5.write_dict_to_HDF5_chunk(groups, data, start, end)
Here write the metadata and close the file
hdf5.finalize_HDF5_write(fout, 'md', **data['md'])
Test the file
Here we read the file in chunks
for chunks in tables_io.io.iterHdf5ToDict('test_multi_write.hdf5', 1000, 'data'):
print("%i:%i %s %s %s" % (chunks[0], chunks[1],
chunks[2]['matrix'].shape, chunks[2]['vect'].shape, chunks[2]['scalar'].shape))