Skip to content

Commit

Permalink
Bugfix: fix type of number for offset and size (#229)
Browse files Browse the repository at this point in the history
* fix type of number for offset and size

* optimization for reading in deep speed framework.

* increase buffer to 16 MB

* revert changes
  • Loading branch information
hariharan-devarajan authored Oct 17, 2024
1 parent b95fb26 commit edb4b27
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion dlio_benchmark/data_generator/indexed_binary_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def generate(self):
out_path_spec_off_idx = self.index_file_path_off(out_path_spec)
out_path_spec_sz_idx = self.index_file_path_size(out_path_spec)
fh = MPI.File.Open(comm, out_path_spec, amode)
samples_per_loop = int(MB / sample_size)
samples_per_loop = int(MB * 16 / sample_size)

for sample_index in range(self.my_rank*samples_per_rank, samples_per_rank*(self.my_rank+1), samples_per_loop):
#logging.info(f"{utcnow()} rank {self.my_rank} writing {sample_index} * {samples_per_loop} for {samples_per_rank} samples")
Expand Down
6 changes: 3 additions & 3 deletions dlio_benchmark/reader/indexed_binary_mmap_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ def load_index_file(self, global_sample_idx, filename, sample_index):
self.file_map_ibr[filename] = []
bin_buffer_mmap = np.memmap(offset_file, mode='r', order='C')
bin_buffer = memoryview(bin_buffer_mmap)
self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint8))
self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint64))
bin_buffer_mmap = np.memmap(sz_file, mode='r', order='C')
bin_buffer = memoryview(bin_buffer_mmap)
self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint8))
self.file_map_ibr[filename].append(np.frombuffer(bin_buffer, dtype=np.uint64))

@dlp.log
def load_index(self):
Expand Down Expand Up @@ -113,4 +113,4 @@ def is_index_based(self):
return True

def is_iterator_based(self):
return True
return True

0 comments on commit edb4b27

Please sign in to comment.