Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some first performance improvements #26

Merged
merged 9 commits into from
Feb 22, 2024
14 changes: 1 addition & 13 deletions src/CylindricalWeightingFieldCalculator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -384,20 +384,14 @@ void CylindricalWeightingFieldCalculator::Calculate(std::filesystem::path outdir
}

cld.ind_t = stepcnt++;
std::cout << "entering saving chunkloop" << std::endl;
m_f -> loop_in_chunks(meep::eisvogel_saving_chunkloop, static_cast<void*>(&cld), m_f -> total_volume());
std::cout << "exit saving chunkloop" << std::endl;

if((stepcnt % 400) == 0) {
std::cout << "start chunk merging" << std::endl;
fstor -> MergeChunks(0, 400);
std::cout << "end chunk merging" << std::endl;
}
}

std::cout << "start chunk merging" << std::endl;
fstor -> MergeChunks(0, 400);
std::cout << "end chunk merging" << std::endl;

// TODO: again, will get better once the three separate arrays are gone
// TODO: for large weighting fields, will have to move chunks to the permanent location continuously throughout the calculation so as not to fill up local storage
Expand All @@ -418,13 +412,7 @@ void CylindricalWeightingFieldCalculator::Calculate(std::filesystem::path outdir

if(meep::am_master()) {
std::shared_ptr<CylindricalWeightingField> cwf = std::make_shared<CylindricalWeightingField>(outdir, *m_start_coords, *m_end_coords);
cwf -> MakeMetadataPersistent();

// Sometimes need to wait for all files to show up?
// std::this_thread::sleep_for(std::chrono::milliseconds(1000));

std::cout << "start defragmentation" << std::endl;
cwf -> MakeMetadataPersistent();
cwf -> RebuildChunks(requested_chunk_size);
std::cout << "end defragmentation" << std::endl;
}
}
8 changes: 4 additions & 4 deletions src/DefaultSerializationTraits.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ namespace stor {
}
};

template<std::size_t n>
struct Traits<std::array<float, n>> {
// template<std::size_t n>
// struct Traits<std::array<float, n>> {

};
// };

// For general vectors
template <typename T>
Expand Down Expand Up @@ -192,7 +192,7 @@ namespace stor {

type retval;
for(std::size_t ind = 0; ind < keys.size(); ind++) {
retval[keys[ind]] = values[ind];
retval.insert(retval.end(), std::pair{keys[ind], values[ind]});
}

return retval;
Expand Down
15 changes: 15 additions & 0 deletions src/DenseNDArray.hh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,21 @@ public:

return retval;
}

static DenseNDArray<T, dims> FromSparseFile(std::iostream& stream) {
T default_value = stor::Traits<T>::deserialize(stream);
shape_t shape = stor::Traits<shape_t>::deserialize(stream);
DenseNDArray<T, dims> retval(shape, default_value);

std::vector<std::array<std::size_t, dims>> keys = stor::Traits<std::vector<std::array<std::size_t, dims>>>::deserialize(stream);
std::vector<T> values = stor::Traits<std::vector<T>>::deserialize(stream);

for(std::size_t el_ind = 0; el_ind < keys.size(); el_ind++) {
retval(keys[el_ind]) = values[el_ind];
}

return retval;
}

DenseNDArray(const shape_t& shape, std::vector<T>&& data) : NDArray<T, dims>(shape) {
m_strides[0] = 1;
Expand Down
2 changes: 2 additions & 0 deletions src/DistributedNDArray.hh
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ private:
// can think about turning this into a class
index_t m_chunk_index;

std::size_t m_chunk_last_accessed;

// The index may not start at {0, 0, 0}
IndexVector m_global_start_ind;

Expand Down
41 changes: 29 additions & 12 deletions src/DistributedNDArray.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace stor {
template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::DistributedNDArray(std::string dirpath, std::size_t max_cache_size, SerializerT& ser) :
NDArray<T, dims>(), m_dirpath(dirpath), m_indexpath(dirpath + "/index.bin"), m_max_cache_size(max_cache_size),
m_global_start_ind(dims, 0), m_ser(ser) {
m_chunk_last_accessed(0), m_global_start_ind(dims, 0), m_ser(ser) {

// Create directory if it does not already exist
if(!std::filesystem::exists(m_dirpath)) {
Expand Down Expand Up @@ -104,7 +104,8 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::WriteChunk(const
std::size_t num_elems = chunk.volume();

// pays off to store as sparse chunk
std::size_t sparse_vs_dense_expense_ratio = 3; // sparse storage is approximately 3x as expensive as dense storage per nonzero element
// std::size_t sparse_vs_dense_expense_ratio = 3; // when only counting storage space: sparse storage is approximately 3x as expensive as dense storage per nonzero element
std::size_t sparse_vs_dense_expense_ratio = 20; // when also counting complexity of deserializing + rebuilding a dense chunk
if(sparse_vs_dense_expense_ratio * num_nonzero_elems < num_elems) {

std::cout << "going to sparsify" << std::endl;
Expand Down Expand Up @@ -232,15 +233,15 @@ void DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::RebuildChunks(co
dense_t current_chunk = retrieveChunk(chunk_index);

if(actual_chunk_shape == current_chunk.shape()) {
std::cout << "chunk already has the correct size, keep it" << std::endl;
// std::cout << "chunk already has the correct size, keep it" << std::endl;
chunks_to_keep.push_back(m_chunk_index[chunk_index]);
continue;
}

std::cout << "now working on rebuild chunk with inds" << std::endl;
std::cout << "chunk_inds_start = " << std::endl;
// std::cout << "now working on rebuild chunk with inds" << std::endl;
// std::cout << "chunk_inds_start = " << std::endl;
chunk_inds_start.print();
std::cout << "chunk_inds_end = " << std::endl;
// std::cout << "chunk_inds_end = " << std::endl;
chunk_inds_end.print();

dense_t chunk = range(chunk_inds_start, chunk_inds_end);
Expand Down Expand Up @@ -436,11 +437,26 @@ bool DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::chunkContainsInd

template <class T, std::size_t dims, template<class, std::size_t> class DenseT, template<class, std::size_t> class SparseT, class SerializerT>
std::size_t DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::getChunkIndex(const IndexVector& inds) {
std::size_t chunk_ind = 0;
for(chunk_ind = 0; chunk_ind < m_chunk_index.size(); chunk_ind++) {
if(chunkContainsInds(m_chunk_index[chunk_ind], inds)) {
return chunk_ind;
}

if(m_chunk_index.size() == 0) {
[[unlikely]];
throw ChunkNotFoundError();
}

if(chunkContainsInds(m_chunk_index[m_chunk_last_accessed], inds)) {
[[likely]];
return m_chunk_last_accessed;
}
else {
// Trigger a full chunk lookup
// TODO: have a search tree here with logarithmic instead of linear complexity
std::size_t chunk_ind = 0;
for(chunk_ind = 0; chunk_ind < m_chunk_index.size(); chunk_ind++) {
if(chunkContainsInds(m_chunk_index[chunk_ind], inds)) {
m_chunk_last_accessed = chunk_ind;
return chunk_ind;
}
}
}

std::cout << "HHHHHHH" << std::endl;
Expand Down Expand Up @@ -476,7 +492,8 @@ DistributedNDArray<T, dims, DenseT, SparseT, SerializerT>::dense_t& DistributedN
m_chunk_cache.insert({chunk_ind, m_ser.template deserialize<dense_t>(ifs)});
}
else if(meta.chunk_type == ChunkType::sparse) {
m_chunk_cache.insert({chunk_ind, dense_t::From(m_ser.template deserialize<sparse_t>(ifs))});
m_chunk_cache.insert({chunk_ind, dense_t::FromSparseFile(ifs)});
// m_chunk_cache.insert({chunk_ind, dense_t::From(m_ser.template deserialize<sparse_t>(ifs))});
}
else {
throw std::runtime_error("Error: unknown chunk type encountered!");
Expand Down
8 changes: 4 additions & 4 deletions src/NDArrayOperations.hh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace NDArrayOps {
template <class T, std::size_t dims>
DenseNDArray<T, dims> concatenate(const DenseNDArray<T, dims>& arr_1, const DenseNDArray<T, dims>& arr_2, std::size_t axis) {

std::cout << " ---> START CONCATENATE <---" << std::endl;
// std::cout << " ---> START CONCATENATE <---" << std::endl;

if(axis >= dims) {
throw std::runtime_error("Error: 'axis' out of bounds");
Expand Down Expand Up @@ -39,7 +39,7 @@ namespace NDArrayOps {

DenseNDArray<T, dims> retval(final_shape_crutch, 0.0);

std::cout << " ---> MIGRATE ARR_1 <---" << std::endl;
// std::cout << " ---> MIGRATE ARR_1 <---" << std::endl;

// Migrate contents of arr_1
{
Expand All @@ -51,7 +51,7 @@ namespace NDArrayOps {
}
}

std::cout << " ---> MIGRATE ARR_2 <---" << std::endl;
// std::cout << " ---> MIGRATE ARR_2 <---" << std::endl;

// Migrate contents of arr_2
{
Expand All @@ -68,7 +68,7 @@ namespace NDArrayOps {
}
}

std::cout << " ---> FINISH CONCATENATE <---" << std::endl;
// std::cout << " ---> FINISH CONCATENATE <---" << std::endl;

return retval;
}
Expand Down
69 changes: 69 additions & 0 deletions src/SparseNDArray.hh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <array>
#include <map>
#include <chrono>
#include "NDArray.hh"
#include "DenseNDArray.hh"
#include "Eisvogel/IteratorUtils.hh"
Expand Down Expand Up @@ -108,6 +109,74 @@ namespace stor {
};
}

// namespace stor {

// template<typename T, std::size_t dims>
// struct Traits<SparseNDArray<T, dims>> {
// using type = SparseNDArray<T, dims>;
// using shape_t = typename type::shape_t;
// using data_t = typename type::data_t;

// static void serialize(std::iostream& stream, const type& val) {
// Traits<T>::serialize(stream, val.m_default_value);
// Traits<shape_t>::serialize(stream, val.m_shape);

// // Convert array data from std::map<index, value> into two 1-dimensional vectors:
// // (index_1, index_2, ...) with total length of `index_len`, and
// // (value_1, value_2, ...) with total length of `number_entries`
// std::size_t number_entries = val.m_data.size();
// std::size_t index_len = dims * number_entries;

// std::vector<std::size_t> index_vec(index_len);
// std::vector<T> data_vec(number_entries);

// // Fill the two vectors ...
// auto it_index_vec = index_vec.begin();
// auto it_data_vec = data_vec.begin();
// for (auto const& [key, val] : val.m_data) {
// std::copy(key.cbegin(), key.cend(), it_index_vec);
// *it_data_vec = val;

// std::advance(it_data_vec, 1);
// std::advance(it_index_vec, dims);
// }

// // ... and serialize them
// Traits<std::vector<std::size_t>>::serialize(stream, index_vec);
// Traits<std::vector<T>>::serialize(stream, data_vec);
// }

// static type deserialize(std::iostream& stream) {

// std::chrono::high_resolution_clock::time_point t_start = std::chrono::high_resolution_clock::now();

// T default_value = Traits<T>::deserialize(stream);
// shape_t shape = Traits<shape_t>::deserialize(stream);

// std::vector<std::size_t> index_vec = Traits<std::vector<std::size_t>>::deserialize(stream);
// std::vector<T> data_vec = Traits<std::vector<T>>::deserialize(stream);

// // Fill `data` map from `index_vec` and `data_vec` ...
// data_t data;
// auto it_index_vec = index_vec.begin();
// auto it_data_vec = data_vec.begin();
// std::array<std::size_t, dims> cur_ind;
// while(it_data_vec != data_vec.end()) {
// std::copy_n(it_index_vec, dims, cur_ind.begin());
// data.insert(data.end(), std::pair{cur_ind, *it_data_vec});

// std::advance(it_data_vec, 1);
// std::advance(it_index_vec, dims);
// }

// // ... and build the sparse array
// SparseNDArray<T, dims> retval(shape, default_value);
// retval.m_data = data;
// return retval;
// }
// };
// }

// Some type shortcuts
template <class T>
using SparseScalarField3D = SparseNDArray<T, 3>;
Expand Down
65 changes: 62 additions & 3 deletions tests/io/testSerialization.cxx
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#include "Serialization.hh"
#include "Eisvogel/Common.hh"
#include "DenseNDArray.hh"
#include "SparseNDArray.hh"

#include <fstream>
#include <iostream>
Expand Down Expand Up @@ -37,7 +39,7 @@ int test_serialization_vector(std::string ser_path, std::size_t length) {
std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();

std::chrono::duration<double> time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
std::cout << "Completed in " << time_span.count() << " seconds." << std::endl;
std::cout << "std::vector --> Completed in " << time_span.count() << " seconds." << std::endl;

for(std::size_t ind = 0; ind < length; ind++) {
if(vec[ind] != res[ind]) {
Expand All @@ -48,8 +50,65 @@ int test_serialization_vector(std::string ser_path, std::size_t length) {
return 0;
}

template <std::size_t dims>
int test_serialization_sparse_array(std::string ser_path, std::size_t size) {

// Fill random sparse array
std::array<std::size_t, dims> shape;
for(std::size_t dim = 0; dim < dims; dim++) {
shape[dim] = size;
}
DenseNDArray<scalar_t, dims> darr(shape, 1.0);

auto to_keep = [](float value) -> bool {
return value != 0.0;
};
SparseNDArray<scalar_t, dims> sparr = SparseNDArray<scalar_t, dims>::From(darr, to_keep, 0.0);

std::chrono::high_resolution_clock::time_point t_start = std::chrono::high_resolution_clock::now();

std::fstream ofs;
ofs.open(ser_path, std::ios::out | std::ios::binary);
stor::DefaultSerializer oser;
oser.serialize(ofs, sparr);
ofs.close();

std::chrono::high_resolution_clock::time_point t_end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
std::cout << "SparseNDArray --> Serialization completed in " << time_span.count() << " seconds." << std::endl;

t_start = std::chrono::high_resolution_clock::now();

std::fstream ifs;
ifs.open(ser_path, std::ios::in | std::ios::binary);
stor::DefaultSerializer iser;
SparseNDArray<scalar_t, dims> sparr_read = iser.deserialize<SparseNDArray<scalar_t, dims>>(ifs);
ifs.close();

t_end = std::chrono::high_resolution_clock::now();
time_span = duration_cast<std::chrono::duration<double>>(t_end - t_start);
std::cout << "SparseNDArray --> Deserialization completed in " << time_span.count() << " seconds." << std::endl;

DenseNDArray<scalar_t, dims> darr_read = DenseNDArray<scalar_t, dims>::From(sparr_read);

IndexVector start_inds(dims, 0);
IndexVector end_inds = darr_read.shape();
for(IndexCounter cnt(start_inds, end_inds); cnt.running(); ++cnt) {
IndexVector cur_ind = cnt.index();
if(darr(cur_ind) != darr_read(cur_ind)) {
throw std::runtime_error("Error: mistake");
}
}

std::cout << "Test passed" << std::endl;

return 0;
}

int main(int argc, char* argv[]) {

std::string ser_path = "ser_test.bin";
test_serialization_vector(ser_path, 1e6);
std::string ser_path = "/tmp/ser_test.bin";

// test_serialization_vector(ser_path, 1e6);
test_serialization_sparse_array<3>(ser_path, 200);
}