Update split data 3D to also support export to HDF5

This commit is contained in:
oruebel 2022-01-13 20:00:58 -08:00 committed by Gunther H. Weber
parent e9fcd7ca78
commit 8d1d713dce

@ -6,16 +6,24 @@ import sys
# For readBOV
from functools import reduce
import operator
try:
import h5py
USE_HDF = True
except:
USE_HDF = False
# Read a 3D text file from disk into a NumPy array
# ... Plain text
def read_file(fn):
"""
Read a 3D plain text file from disk into a NumPy array
"""
data = np.fromfile(fn, dtype=float, sep=" ")
data = data[3:].reshape((int(data[2]),int(data[0]),int(data[1])))
return data
# ... VisItBOV
def readBOV(filename):
"""
Read data from a VisIt BOV file
"""
with open(filename, 'r') as f:
header = dict([(lambda x: (x[0].strip().lower(), x[1].strip()))(l.strip().split(':')) for l in f.readlines()])
if 'data_endian' in header:
@ -32,11 +40,22 @@ def readBOV(filename):
return (header['variable'], header['centering'].lower(), np.fromfile(dataname, dtype, count).reshape(tuple(reversed(shape))))
return None
# Save a block from a 3D NumPy array to disk
# Python order is slice, row, col
# TXT file order is row, col, slice
# offset and size are in file order
def save_piece(fn, array, offset, n_blocks, block_index, size):
"""
Save a block from a 3D NumPy array to disk.
Python order is slice, row, col
TXT file order is row, col, slice
offset and size are in file order
Args:
fn (str): filename
array (np.array) : Array with the full data
offset (tuple) : Tuple of int offsets
n_blocks (tuple) : Tuple of ints with the number of blocks per dimension
block_index (tuple) : Tuple of ints with index of the block
size (tuple) : Tuple of ints with the size of the block in each dimension
"""
with open(fn, 'w') as f:
perm = [1, 2, 0]
f.write('#GLOBAL_EXTENTS ' + ' '.join(map(str, [array.shape[i] for i in perm])) + '\n')
@ -51,51 +70,79 @@ def save_piece(fn, array, offset, n_blocks, block_index, size):
np.savetxt(f, array[s, offset[0]:offset[0]+size[0],offset[1]:offset[1]+size[1]], fmt='%.16g')
f.write('\n')
# Compute split points for splitting into n blocks
def split_points(shape, nblocks):
"""
Compute split points for splitting into n blocks:
Args:
shape (int): Length of the axis
nblocks (int): Number of blocks to split the axis into
Return:
List of split points along the axis
"""
dx = float(shape-1) / nblocks
return [ math.floor(i*dx) for i in range(nblocks)] + [ shape - 1 ]
if len(sys.argv) < 2:
print("Error: Usage split_data_3d.py <filename> <outfilepattern> [<n_blocks_per_axis>|<n_blocks_x> <n_blocks_y> <n_blocks_z>]", file=sys.stderr)
sys.exit(1)
def save_hdf(filename, data, **kwargs):
"""
Save the data to HDF5.
The axes of the data will be transposed and reorded to match the order of save_piece function.
# Parse parameters
in_filename = sys.argv[1]
Args:
filename (str) : Name fo the HDF5 file
data (np.array): 3D array with the data
kwargs (dict) : Dict with keyword arguments for the h5py create_dataset function
"""
f = h5py.File(filename, 'w')
f.create_dataset(name='data', data=np.swapaxes(np.transpose(data), 0, 1), **kwargs)
name, ext = os.path.splitext(in_filename)
#out_filename_pattern = name + '_split_%d.txt'
out_filename_pattern = sys.argv[2]
if __name__ == '__main__':
n_blocks = (2, 2, 2)
if len(sys.argv) > 3:
if len(sys.argv) >= 6:
n_blocks = (int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]))
if len(sys.argv) < 2:
print("Error: Usage split_data_3d.py <filename> <outfilepattern> [<n_blocks_per_axis>|<n_blocks_x> <n_blocks_y> <n_blocks_z>]", file=sys.stderr)
sys.exit(1)
# Parse parameters
in_filename = sys.argv[1]
name, ext = os.path.splitext(in_filename)
#out_filename_pattern = name + '_split_%d.txt'
out_filename_pattern = sys.argv[2]
n_blocks = (2, 2, 2)
if len(sys.argv) > 3:
if len(sys.argv) >= 6:
n_blocks = (int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]))
else:
n_blocks = (int(sys.argv[3]), int(sys.argv[3]), int(sys.argv[3]))
# Read data
if ext == '.bov':
data = readBOV(in_filename)[2]
else:
n_blocks = (int(sys.argv[3]), int(sys.argv[3]), int(sys.argv[3]))
data = read_file(in_filename)
# Read data
if ext == '.bov':
data = readBOV(in_filename)[2]
else:
data = read_file(in_filename)
# export to hdf5 as well
if USE_HDF:
save_hdf((out_filename_pattern % 0).replace('.txt', '.h5'), data)
# Python order is slice, row, col
# Compute split points
split_points_s = split_points(data.shape[0], n_blocks[2])
split_points_r = split_points(data.shape[1], n_blocks[0])
split_points_c = split_points(data.shape[2], n_blocks[1])
# Python order is slice, row, col
# Compute split points
split_points_s = split_points(data.shape[0], n_blocks[2])
split_points_r = split_points(data.shape[1], n_blocks[0])
split_points_c = split_points(data.shape[2], n_blocks[1])
# Create the file that records the slice values
slice_filename = name + '_slices.txt'
# Create the file that records the slice values
slice_filename = name + '_slices.txt'
# Save blocks
block_no = 0
for block_index_s, (s_start, s_stop) in enumerate(zip(split_points_s, split_points_s[1:])):
for block_index_r, (r_start, r_stop) in enumerate(zip(split_points_r, split_points_r[1:])):
for block_index_c, (c_start, c_stop) in enumerate(zip(split_points_c, split_points_c[1:])):
n_s = s_stop - s_start + 1
n_r = r_stop - r_start + 1
n_c = c_stop - c_start + 1
save_piece(out_filename_pattern % block_no, data, (r_start, c_start, s_start), n_blocks, (block_index_r, block_index_c, block_index_s), (n_r, n_c, n_s))
block_no += 1
# Save blocks
block_no = 0
for block_index_s, (s_start, s_stop) in enumerate(zip(split_points_s, split_points_s[1:])):
for block_index_r, (r_start, r_stop) in enumerate(zip(split_points_r, split_points_r[1:])):
for block_index_c, (c_start, c_stop) in enumerate(zip(split_points_c, split_points_c[1:])):
n_s = s_stop - s_start + 1
n_r = r_stop - r_start + 1
n_c = c_stop - c_start + 1
save_piece(out_filename_pattern % block_no, data, (r_start, c_start, s_start), n_blocks, (block_index_r, block_index_c, block_index_s), (n_r, n_c, n_s))
block_no += 1