From 2488f56cd815d4ff40c5373fd17b7349e2468e25 Mon Sep 17 00:00:00 2001 From: LTLA Date: Wed, 15 May 2024 11:25:01 -0700 Subject: [PATCH 1/5] Begin updating for changes to the underlying libs. --- include/tatami_r/UnknownMatrix.hpp | 5 +- include/tatami_r/dense_extractor.hpp | 407 ++++++++++++++++----------- include/tatami_r/dense_matrix.hpp | 12 +- 3 files changed, 250 insertions(+), 174 deletions(-) diff --git a/include/tatami_r/UnknownMatrix.hpp b/include/tatami_r/UnknownMatrix.hpp index 478dd6c..a5e3005 100644 --- a/include/tatami_r/UnknownMatrix.hpp +++ b/include/tatami_r/UnknownMatrix.hpp @@ -3,7 +3,6 @@ #include "Rcpp.h" #include "tatami/tatami.hpp" -#include "tatami_chunked/tatami_chunked.hpp" #include "dense_extractor.hpp" #include "sparse_extractor.hpp" @@ -17,9 +16,9 @@ namespace tatami_r { /** - * @brief Options for R matrix extraction. + * @brief Options for data extraction from an `UnknownMatrix`. */ -struct Options { +struct UnknownMatrixOptions { /** * Size of the cache, in bytes. * If -1, this is determined from `DelayedArray::getAutoBlockSize()`. diff --git a/include/tatami_r/dense_extractor.hpp b/include/tatami_r/dense_extractor.hpp index 12df5c5..1e8261a 100644 --- a/include/tatami_r/dense_extractor.hpp +++ b/include/tatami_r/dense_extractor.hpp @@ -8,231 +8,300 @@ #include #include +#include namespace tatami_r { namespace UnknownMatrix_internal { -template -struct DenseBase : public tatami::DenseExtractor { - DenseBase( +/******************** + *** Core classes *** + ********************/ + +template +struct SoloDenseCore { + SoloDenseBase( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, - Rcpp::IntegerVector secondary_extract, + Rcpp::IntegerVector non_target_extract, bool by_column, - Index_ max_primary_chunk_length, - const std::vector& ticks, - const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : + [[maybe_unused]] const std::vector& ticks, + [[maybe_unused]] const std::vector& map, + [[maybe_unused]] const tatami_chunked::SlabCacheStats& stats) : mat(mat), dense_extractor(dense_extractor), extract_args(2), - by_column(by_column), - chunk_ticks(ticks), - chunk_map(map), - secondary_length(secondary_extract.size()), - cache( - max_primary_chunk_length, - secondary_length, - cache_size_in_bytes / sizeof(CachedValue_), - require_minimum_cache, - std::move(ora) - ) + by_column(by_column) { - if (cache.num_slabs_in_cache == 0) { - solo.resize(secondary_length); + extract_args[static_cast(!by_column)] = non_target_extract; + } + +private: + const Rcpp::RObject& mat; + const Rcpp::Function& dense_extractor; + Rcpp::List extract_args; + bool by_column; + +public: + template + void fetch_raw(Index_ i, Value_* buffer) { + if constexpr(oracle_) { + i = cache.cache.next(); } +#ifdef TATAMI_R_PARALLELIZE_UNKNOWN + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { +#endif + + extract_args[static_cast(by_column)] = Rcpp::IntegerVector::create(i + 1); + auto obj = dense_extractor(mat, extract_args); if (by_column) { - extract_args[0] = secondary_extract; + parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); } else { - extract_args[1] = secondary_extract; + parse_dense_matrix(obj, buffer, 0, 0, 1, non_target_length); } + +#ifdef TATAMI_R_PARALLELIZE_UNKNOWN + }); +#endif } +}; - ~DenseBase() = default; +template +struct MyopicDenseCore { + MyopicDenseBase( + const Rcpp::RObject& mat, + const Rcpp::Function& dense_extractor, + [[maybe_unused]] tatami::MaybeOracle ora, + Rcpp::IntegerVector non_target_extract, + bool by_column, + const std::vector& ticks, + const std::vector& map, + const tatami_chunked::SlabCacheStats& stats) : + mat(mat), + dense_extractor(dense_extractor), + extract_args(2), + by_column(by_column), + chunk_ticks(ticks), + chunk_map(map), + non_target_length(non_target_extract.size()), + factory(stats), + cache(max_slabs) + { + extract_args[static_cast(!by_column)] = non_target_extract; + } private: - typedef std::vector Slab; - const Rcpp::RObject& mat; const Rcpp::Function& dense_extractor; Rcpp::List extract_args; - bool by_column; + const std::vector& chunk_ticks; const std::vector& chunk_map; - size_t secondary_length; + size_t non_target_length; - tatami_chunked::TypicalSlabCacheWorkspace cache; - - Slab solo; - -private: - std::pair fetch_raw(Index_ i) { - if (cache.num_slabs_in_cache == 0) { - if constexpr(oracle_) { - i = cache.cache.next(); - } + tatami_chunked::DenseSlabFactory factory; + typedef typename decltype(factory)::Slab; + tatami_chunked::LruSlabCache cache; +public: + template + void fetch_raw(Index_ i, Value_* buffer) { + auto chosen = chunk_map[i]; + + const auto& slab = cache.find( + chosen, + [&]() -> Slab { + return factory.create(); + }, + [&](Index_ id, Slab& cache) { #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { #endif - extract_args[static_cast(by_column)] = Rcpp::IntegerVector::create(i + 1); - auto obj = dense_extractor(mat, extract_args); - if (by_column) { - parse_dense_matrix(obj, solo, 0, 0, secondary_length, 1); - } else { - parse_dense_matrix(obj, solo, 0, 0, 1, secondary_length); - } + auto chunk_start = chunk_ticks[id]; + size_t chunk_len = chunk_ticks[id + 1] - chunk_start; + Rcpp::IntegerVector primary_extract(chunk_len); + std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); + extract_args[static_cast(by_column)] = primary_extract; + auto obj = dense_extractor(mat, extract_args); + if (by_column) { + parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); + } else { + parse_dense_matrix(obj, cache.data, 0, 0, chunk_len, non_target_length); + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); + }); #endif + } + ); - return std::make_pair(&solo, static_cast(0)); - - } else if constexpr(!oracle_) { - auto chosen = chunk_map[i]; + auto src = slab.data + static_cast(i - chunk_ticks[chosen]) * non_target_length; + std::copy_n(src, non_target_length, buffer); + } +}; - const auto& slab = cache.cache.find( - chosen, - [&]() -> Slab { - return Slab(); - }, - [&](Index_ id, Slab& cache) { -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif +template +struct OracularDenseCore { + OracularDenseBase( + const Rcpp::RObject& mat, + const Rcpp::Function& dense_extractor, + tatami::MaybeOracle ora, + Rcpp::IntegerVector non_target_extract, + bool by_column, + const std::vector& ticks, + const std::vector& map, + const tatami_chunked::SlabCacheStats& stats) : + mat(mat), + dense_extractor(dense_extractor), + extract_args(2), + by_column(by_column), + chunk_ticks(ticks), + chunk_map(map), + non_target_length(non_target_extract.size()), + factory(stats), + cache(std::move(ora), max_slabs) + { + extract_args[static_cast(!by_column)] = non_target_extract; + } - auto chunk_start = chunk_ticks[id]; - size_t chunk_len = chunk_ticks[id + 1] - chunk_start; - Rcpp::IntegerVector primary_extract(chunk_len); - std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); - extract_args[static_cast(by_column)] = primary_extract; - auto obj = dense_extractor(mat, extract_args); - if (by_column) { - parse_dense_matrix(obj, cache, 0, 0, secondary_length, chunk_len); - } else { - parse_dense_matrix(obj, cache, 0, 0, chunk_len, secondary_length); - } +private: + const Rcpp::RObject& mat; + const Rcpp::Function& dense_extractor; + Rcpp::List extract_args; + bool by_column; -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif + const std::vector& chunk_ticks; + const std::vector& chunk_map; + size_t non_target_length; + + tatami_chunked::DenseSlabFactory factory; + typedef typename decltype(factory)::Slab; + tatami_chunked::OracularSlabCache cache; + +public + template + void fetch_raw(Index_, Value_* buffer) { + auto res = cache.cache.next( + [&](Index_ i) -> std::pair { + auto chosen = chunk_map[i]; + return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); + }, + [&]() -> Slab { + return factory.create(); + }, + [&](std::vector >& to_populate) { + // Sorting them so that the indices are in order. + if (!std::is_sorted(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; })) { + std::sort(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; }); } - ); - return std::make_pair(&slab, static_cast(i - chunk_ticks[chosen])); - } else { - return cache.cache.next( - [&](Index_ i) -> std::pair { - auto chosen = chunk_map[i]; - return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); - }, - [&]() -> Slab { - return Slab(); - }, - [&](std::vector >& to_populate) { - // Sorting them so that the indices are in order. - if (!std::is_sorted(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; })) { - std::sort(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; }); - } - - Index_ total_len = 0; - for (const auto& p : to_populate) { - total_len += chunk_ticks[p.first + 1] - chunk_ticks[p.first]; - } + Index_ total_len = 0; + for (const auto& p : to_populate) { + total_len += chunk_ticks[p.first + 1] - chunk_ticks[p.first]; + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { #endif - Rcpp::IntegerVector primary_extract(total_len); - Index_ current = 0; - for (const auto& p : to_populate) { - Index_ chunk_start = chunk_ticks[p.first]; - Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - auto start = primary_extract.begin() + current; - std::iota(start, start + chunk_len, chunk_ticks[p.first] + 1); - current += chunk_len; - } + Rcpp::IntegerVector primary_extract(total_len); + Index_ current = 0; + for (const auto& p : to_populate) { + Index_ chunk_start = chunk_ticks[p.first]; + Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; + auto start = primary_extract.begin() + current; + std::iota(start, start + chunk_len, chunk_ticks[p.first] + 1); + current += chunk_len; + } + + extract_args[static_cast(by_column)] = primary_extract; + auto obj = dense_extractor(mat, extract_args); - extract_args[static_cast(by_column)] = primary_extract; - auto obj = dense_extractor(mat, extract_args); - - current = 0; - for (const auto& p : to_populate) { - auto chunk_start = chunk_ticks[p.first]; - Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - if (by_column) { - parse_dense_matrix(obj, *p.second, 0, current, secondary_length, chunk_len); - } else { - parse_dense_matrix(obj, *p.second, current, 0, chunk_len, secondary_length); - } - current += chunk_len; + current = 0; + for (const auto& p : to_populate) { + auto chunk_start = chunk_ticks[p.first]; + Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; + if (by_column) { + parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); + } else { + parse_dense_matrix(obj, p.second->data, current, 0, chunk_len, non_target_length); } + current += chunk_len; + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); + }); #endif - } - ); - } - } + } + ); -public: - const Value_* fetch(Index_ i, Value_* buffer) { - auto res = fetch_raw(i); - size_t shift = this->secondary_length * static_cast(res.second); // cast to size_t to avoid overflow. - std::copy_n(res.first->data() + shift, this->secondary_length, buffer); - return buffer; + size_t shift = non_target_length * static_cast(res.second); // cast to size_t to avoid overflow. + std::copy_n(res.first->data + shift, non_target_length, buffer); } }; -template -struct DenseFull : public DenseBase { +template +using DenseCore = typename std::conditional, + typename std::conditional, + MyopicDenseCore + >::type +>::type; + +/************************* + *** Extractor classes *** + *************************/ + +template +struct DenseFull : public tatami::DenseExtractor { DenseFull( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, - Index_ secondary_dim, + Index_ non_target_dim, bool by_column, - Index_ max_primary_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - DenseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, dense_extractor, std::move(ora), [&]() { - Rcpp::IntegerVector output(secondary_dim); + Rcpp::IntegerVector output(non_target_dim); std::iota(output.begin(), output.end(), 1); return output; }(), by_column, - max_primary_chunk_length, ticks, map, - cache_size_in_bytes, - require_minimum_cache + stats ) {} + +private: + DenseCore core; + +public: + const Value_* fetch(Index_ i, Value_* buffer) { + core->fetch_raw(i, buffer); + return buffer; + } }; -template -struct DenseBlock : public DenseBase { +template +struct DenseBlock : public tatami::DenseExtractor { DenseBlock( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, @@ -240,12 +309,10 @@ struct DenseBlock : public DenseBase { Index_ block_start, Index_ block_length, bool by_column, - Index_ max_primary_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - DenseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, dense_extractor, std::move(ora), @@ -255,29 +322,34 @@ struct DenseBlock : public DenseBase { return output; }(), by_column, - max_primary_chunk_length, ticks, map, - cache_size_in_bytes, - require_minimum_cache + stats ) {} + +private: + DenseCore core; + +public: + const Value_* fetch(Index_ i, Value_* buffer) { + core->fetch_raw(i, buffer); + return buffer; + } }; template -struct DenseIndexed : public DenseBase { +struct DenseIndexed : public tatami::DenseExtractor { DenseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, bool by_column, - Index_ max_primary_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - DenseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, dense_extractor, std::move(ora), @@ -289,13 +361,20 @@ struct DenseIndexed : public DenseBase { return output; }(), by_column, - max_primary_chunk_length, ticks, map, - cache_size_in_bytes, - require_minimum_cache + stats ) {} + +private: + DenseCore core; + +public: + const Value_* fetch(Index_ i, Value_* buffer) { + core->fetch_raw(i, buffer); + return buffer; + } }; } diff --git a/include/tatami_r/dense_matrix.hpp b/include/tatami_r/dense_matrix.hpp index b93889e..dca61f4 100644 --- a/include/tatami_r/dense_matrix.hpp +++ b/include/tatami_r/dense_matrix.hpp @@ -7,26 +7,24 @@ namespace tatami_r { template -void parse_dense_matrix_internal(const InputObject_& y, std::vector& cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { - cache.resize(num_rows * num_cols); +void parse_dense_matrix_internal(const InputObject_& y, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { auto input = static_cast(y.begin()) + start_row + start_col * static_cast(y.rows()); - auto output = cache.data(); if constexpr(transpose_) { // y is a column-major matrix, but transpose() expects a row-major // input, so we just conceptually transpose it. - tatami::transpose(input, num_cols, num_rows, y.rows(), output, num_cols); + tatami::transpose(input, num_cols, num_rows, y.rows(), cache, num_cols); } else { for (size_t c = 0; c < num_cols; ++c) { - std::copy_n(input, num_rows, output); + std::copy_n(input, num_rows, cache); input += y.rows(); - output += num_rows; + cache += num_rows; } } } template -void parse_dense_matrix(const Rcpp::RObject& seed, std::vector& cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { +void parse_dense_matrix(const Rcpp::RObject& seed, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { auto stype = seed.sexp_type(); if (stype == REALSXP) { Rcpp::NumericMatrix y(seed); From 8670b70ff0153a755e66354837c0e80ad078b35e Mon Sep 17 00:00:00 2001 From: LTLA Date: Wed, 15 May 2024 14:34:49 -0700 Subject: [PATCH 2/5] Inch closer towards completion. --- include/tatami_r/UnknownMatrix.hpp | 272 +++++-------- include/tatami_r/dense_extractor.hpp | 77 ++-- include/tatami_r/sparse_extractor.hpp | 561 +++++++++++++------------- include/tatami_r/sparse_matrix.hpp | 3 + 4 files changed, 408 insertions(+), 505 deletions(-) diff --git a/include/tatami_r/UnknownMatrix.hpp b/include/tatami_r/UnknownMatrix.hpp index a5e3005..54ab53f 100644 --- a/include/tatami_r/UnknownMatrix.hpp +++ b/include/tatami_r/UnknownMatrix.hpp @@ -52,7 +52,7 @@ class UnknownMatrix : public tatami::Matrix { * @param seed A matrix-like R object. * @param opt Extraction options. */ - UnknownMatrix(Rcpp::RObject seed, const Options& opt) : + UnknownMatrix(Rcpp::RObject seed, const UnknownMatrixOptions& opt) : original_seed(seed), delayed_env(Rcpp::Environment::namespace_env("DelayedArray")), sparse_env(Rcpp::Environment::namespace_env("SparseArray")), @@ -296,90 +296,60 @@ class UnknownMatrix : public tatami::Matrix { *** Myopic dense *** ********************/ private: - template - std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, const tatami::Options&) const { - std::unique_ptr > output; - -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif - - if (!internal_sparse) { - output.reset(new UnknownMatrix_internal::DenseFull( - original_seed, - dense_extractor, - std::move(ora), - secondary_dim(row), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); + template + void populate_dense_by_sparse(std::unique_ptr >& output, Args_&& ... args) { + if (!internal_sparse) { + output.reset(new UnknownMatrix_internal::DenseFull(std::forward(args)...)); } else { - output.reset(new UnknownMatrix_internal::DensifiedSparseFull( - original_seed, - sparse_extractor, - std::move(ora), - secondary_dim(row), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); + output.reset(new UnknownMatrix_internal::DensifiedSparseFull(std::forward(args)...)); } + } -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif + template + void populate_dense_by_solo(std::unique_ptr >& output, bool solo, Args_&& ... args) { + if (solo) { + populate_dense_by_sparse(output, std::forward(args)...); + } else { + populate_dense_by_sparse(output, std::forward(args)...); + } + } - return output; + template + void populate_dense_by_accrow(std::unique_ptr >& output, bool row, bool solo, Args_&& ... args) { + if (row) { + populate_dense_by_solo(output, solo, std::forward(args)...); + } else { + populate_dense_by_solo(output, solo, std::forward(args)...); + } } - template - std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options&) const { + template + std::unique_ptr > populate_dense_internal(bool row, Index_ non_target_length, tatami::MaybeOracle ora, Args_&& ... args) const { std::unique_ptr > output; + Index_ max_target_chunk_length = max_primary_chunk_length(row); + tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, cache_size_in_bytes, sizeof(CachedValue_), require_minimum_cache); + #ifdef TATAMI_R_PARALLELIZE_UNKNOWN // This involves some Rcpp initializations, so we lock it just in case. auto& mexec = executor(); mexec.run([&]() -> void { #endif - if (!internal_sparse) { - output.reset(new UnknownMatrix_internal::DenseBlock( - original_seed, - dense_extractor, - std::move(ora), - block_start, - block_length, - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); - } else { - output.reset(new UnknownMatrix_internal::DensifiedSparseBlock( - original_seed, - sparse_extractor, - std::move(ora), - block_start, - block_length, - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); - } + populate_dense_by_accrow( + output, + row, + stats.max_slabs_in_cache == 0, + // And now all the other arguments. + original_seed, + dense_extractor, + std::move(ora), + std::forward(args)..., + max_target_chunk_length, + chunk_ticks(row), + chunk_map(row), + stats + ); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -389,48 +359,20 @@ class UnknownMatrix : public tatami::Matrix { } template - std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options&) const { - std::unique_ptr > output; - -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif - - if (!internal_sparse) { - output.reset(new UnknownMatrix_internal::DenseIndexed( - original_seed, - dense_extractor, - std::move(ora), - std::move(indices_ptr), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); - } else { - output.reset(new UnknownMatrix_internal::DensifiedSparseIndexed( - original_seed, - sparse_extractor, - std::move(ora), - std::move(indices_ptr), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache - )); - } + std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, const tatami::Options&) const { + Index_ non_target_dim = secondary_dim(row); + return populate_dense_internal(row, non_target_dim, std::move(ora), non_target_dim); + } -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif + template + std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options&) const { + return populate_dense_internal(row, block_length, std::move(ora), block_start, block_length); + } - return output; + template + std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options&) const { + Index_ nidx = indices_ptr->size(); + return populate_dense_internal(row, nidx, std::move(ora), std::move(indices_ptr)); } public: @@ -466,59 +408,56 @@ class UnknownMatrix : public tatami::Matrix { *** Myopic sparse *** *********************/ public: - template - std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, const tatami::Options& opt) const { - std::unique_ptr > output; - -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif - - output.reset(new UnknownMatrix_internal::SparseFull( - original_seed, - sparse_extractor, - std::move(ora), - secondary_dim(row), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache, - opt.sparse_extract_value, - opt.sparse_extract_index - )); - -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif + template + void populate_sparse_by_solo(std::unique_ptr >& output, bool solo, Args_&& ... args) { + if (solo) { + output.reset(new UnknownMatrix_internal::SparseFull(std::forward(args)...)); + } else { + output.reset(new UnknownMatrix_internal::SparseFull(std::forward(args)...)); + } + } - return output; + template + void populate_sparse_by_accrow(std::unique_ptr >& output, bool row, bool solo, Args_&& ... args) { + if (row) { + populate_sparse_by_solo(output, solo, std::forward(args)...); + } else { + populate_sparse_by_solo(output, solo, std::forward(args)...); + } } - template - std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const { + template + std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, const tatami::Options& opt, Args_&& ... args) const { std::unique_ptr > output; + Index_ max_target_chunk_length = max_primary_chunk_length(row); + tatami_chunked::SlabCacheStats stats( + max_target_chunk_length, + non_target_length, + cache_size_in_bytes, + (opt.sparse_extract_index ? sizeof(CachedIndex_) : 0) + (opt.sparse_extract_value ? sizeof(CachedValue_) : 0), + require_minimum_cache + ); + #ifdef TATAMI_R_PARALLELIZE_UNKNOWN // This involves some Rcpp initializations, so we lock it just in case. auto& mexec = executor(); mexec.run([&]() -> void { #endif - output.reset(new UnknownMatrix_internal::SparseBlock( + populate_sparse_by_accrow( + output, + row, + stats.max_slabs_in_cache == 0, + // And now the rest of the arguments. original_seed, sparse_extractor, std::move(ora), - block_start, - block_length, - !row, + std::forward(args)..., max_primary_chunk_length(row), chunk_ticks(row), chunk_map(row), - cache_size_in_bytes, + stats, require_minimum_cache, opt.sparse_extract_value, opt.sparse_extract_index @@ -532,35 +471,20 @@ class UnknownMatrix : public tatami::Matrix { } template - std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options& opt) const { - std::unique_ptr > output; - -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif - - output.reset(new UnknownMatrix_internal::SparseIndexed( - original_seed, - sparse_extractor, - std::move(ora), - std::move(indices_ptr), - !row, - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - cache_size_in_bytes, - require_minimum_cache, - opt.sparse_extract_value, - opt.sparse_extract_index - )); + std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, const tatami::Options& opt) const { + Index_ non_target_dim = secondary_dim(row); + return populate_sparse_internal(row, non_target_dim, std::move(ora), opt, non_target_dim); + } -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif + template + std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const { + return populate_sparse_internal(row, block_length, std::move(ora), block_start, block_length, opt); + } - return output; + template + std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options& opt) const { + Index_ nidx = indices_ptr->size(); + return populate_sparse_internal(row, nidx, std::move(ora), std::move(indices_ptr), opt); } public: diff --git a/include/tatami_r/dense_extractor.hpp b/include/tatami_r/dense_extractor.hpp index 1e8261a..704c28f 100644 --- a/include/tatami_r/dense_extractor.hpp +++ b/include/tatami_r/dense_extractor.hpp @@ -18,30 +18,27 @@ namespace UnknownMatrix_internal { *** Core classes *** ********************/ -template +template struct SoloDenseCore { SoloDenseBase( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, - bool by_column, - [[maybe_unused]] const std::vector& ticks, + [[maybe_unused]] const std::vector& ticks, // provided here for compatibility with the other Dense*Core classes. [[maybe_unused]] const std::vector& map, [[maybe_unused]] const tatami_chunked::SlabCacheStats& stats) : mat(mat), dense_extractor(dense_extractor), - extract_args(2), - by_column(by_column) + extract_args(2) { - extract_args[static_cast(!by_column)] = non_target_extract; + extract_args[static_cast(accrow_)] = non_target_extract; } private: const Rcpp::RObject& mat; const Rcpp::Function& dense_extractor; Rcpp::List extract_args; - bool by_column; public: template @@ -56,13 +53,9 @@ struct SoloDenseCore { mexec.run([&]() -> void { #endif - extract_args[static_cast(by_column)] = Rcpp::IntegerVector::create(i + 1); + extract_args[static_cast(!accrow_)] = Rcpp::IntegerVector::create(i + 1); auto obj = dense_extractor(mat, extract_args); - if (by_column) { - parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); - } else { - parse_dense_matrix(obj, buffer, 0, 0, 1, non_target_length); - } + parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -70,35 +63,32 @@ struct SoloDenseCore { } }; -template +template struct MyopicDenseCore { MyopicDenseBase( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, - [[maybe_unused]] tatami::MaybeOracle ora, + [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Dense*Core classes. Rcpp::IntegerVector non_target_extract, - bool by_column, const std::vector& ticks, const std::vector& map, const tatami_chunked::SlabCacheStats& stats) : mat(mat), dense_extractor(dense_extractor), extract_args(2), - by_column(by_column), chunk_ticks(ticks), chunk_map(map), non_target_length(non_target_extract.size()), factory(stats), cache(max_slabs) { - extract_args[static_cast(!by_column)] = non_target_extract; + extract_args[static_cast(accrow_)] = non_target_extract; } private: const Rcpp::RObject& mat; const Rcpp::Function& dense_extractor; Rcpp::List extract_args; - bool by_column; const std::vector& chunk_ticks; const std::vector& chunk_map; @@ -129,13 +119,9 @@ struct MyopicDenseCore { size_t chunk_len = chunk_ticks[id + 1] - chunk_start; Rcpp::IntegerVector primary_extract(chunk_len); std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); - extract_args[static_cast(by_column)] = primary_extract; + extract_args[static_cast(!accrow_)] = primary_extract; auto obj = dense_extractor(mat, extract_args); - if (by_column) { - parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); - } else { - parse_dense_matrix(obj, cache.data, 0, 0, chunk_len, non_target_length); - } + parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -148,35 +134,32 @@ struct MyopicDenseCore { } }; -template +template struct OracularDenseCore { OracularDenseBase( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, - bool by_column, const std::vector& ticks, const std::vector& map, const tatami_chunked::SlabCacheStats& stats) : mat(mat), dense_extractor(dense_extractor), extract_args(2), - by_column(by_column), chunk_ticks(ticks), chunk_map(map), non_target_length(non_target_extract.size()), factory(stats), cache(std::move(ora), max_slabs) { - extract_args[static_cast(!by_column)] = non_target_extract; + extract_args[static_cast(accrow_)] = non_target_extract; } private: const Rcpp::RObject& mat; const Rcpp::Function& dense_extractor; Rcpp::List extract_args; - bool by_column; const std::vector& chunk_ticks; const std::vector& chunk_map; @@ -224,18 +207,14 @@ public current += chunk_len; } - extract_args[static_cast(by_column)] = primary_extract; + extract_args[static_cast(!accrow_)] = primary_extract; auto obj = dense_extractor(mat, extract_args); current = 0; for (const auto& p : to_populate) { auto chunk_start = chunk_ticks[p.first]; Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - if (by_column) { - parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); - } else { - parse_dense_matrix(obj, p.second->data, current, 0, chunk_len, non_target_length); - } + parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); current += chunk_len; } @@ -250,12 +229,12 @@ public } }; -template +template using DenseCore = typename std::conditional, + SoloDenseCore, typename std::conditional, - MyopicDenseCore + OracularDenseCore, + MyopicDenseCore >::type >::type; @@ -263,14 +242,13 @@ using DenseCore = typename std::conditional +template struct DenseFull : public tatami::DenseExtractor { DenseFull( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, Index_ non_target_dim, - bool by_column, const std::vector& ticks, const std::vector& map, const tatami_chunked::SlabCacheStats& stats) : @@ -283,7 +261,6 @@ struct DenseFull : public tatami::DenseExtractor { std::iota(output.begin(), output.end(), 1); return output; }(), - by_column, ticks, map, stats @@ -291,7 +268,7 @@ struct DenseFull : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { @@ -300,7 +277,7 @@ struct DenseFull : public tatami::DenseExtractor { } }; -template +template struct DenseBlock : public tatami::DenseExtractor { DenseBlock( const Rcpp::RObject& mat, @@ -308,7 +285,6 @@ struct DenseBlock : public tatami::DenseExtractor { tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, - bool by_column, const std::vector& ticks, const std::vector& map, const tatami_chunked::SlabCacheStats& stats) : @@ -321,7 +297,6 @@ struct DenseBlock : public tatami::DenseExtractor { std::iota(output.begin(), output.end(), block_start + 1); return output; }(), - by_column, ticks, map, stats @@ -329,7 +304,7 @@ struct DenseBlock : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { @@ -338,14 +313,13 @@ struct DenseBlock : public tatami::DenseExtractor { } }; -template +template struct DenseIndexed : public tatami::DenseExtractor { DenseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, - bool by_column, const std::vector& ticks, const std::vector& map, const tatami_chunked::SlabCacheStats& stats) : @@ -360,7 +334,6 @@ struct DenseIndexed : public tatami::DenseExtractor { } return output; }(), - by_column, ticks, map, stats @@ -368,7 +341,7 @@ struct DenseIndexed : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { diff --git a/include/tatami_r/sparse_extractor.hpp b/include/tatami_r/sparse_extractor.hpp index cac086e..7d9f0d3 100644 --- a/include/tatami_r/sparse_extractor.hpp +++ b/include/tatami_r/sparse_extractor.hpp @@ -13,320 +13,323 @@ namespace tatami_r { namespace UnknownMatrix_internal { -template -struct SparseBase { - SparseBase( +/******************** + *** Core classes *** + ********************/ + +template +struct SparseSoloCore { + SparseSoloCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, - Rcpp::IntegerVector secondary_extract, - bool by_column, - Index_ max_primary_chunk_length, - const std::vector& ticks, - const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache, + Rcpp::IntegerVector non_target_extract, + [[maybe_unused]] Index_ max_target_chunk_length, // provided here for compatibility with the other Sparse*Core classes. + [[maybe_unused]] const std::vector& ticks, + [[maybe_unused]] const std::vector& map, + [[maybe_unused]] const tatami_chunked::SlabCacheStats& stats, bool needs_value, bool needs_index) : - mat(std::move(mat)), - sparse_extractor(std::move(sparse_extractor)), + mat(mat), + sparse_extractor(sparse_extractor), extract_args(2), - secondary_indices(std::move(secondary_extract)), - by_column(by_column), - chunk_ticks(ticks), - chunk_map(map), - max_primary_chunk_length(max_primary_chunk_length), - secondary_length(secondary_indices.size()), - needs_value(needs_value), - needs_index(needs_index), - cache( - max_primary_chunk_length, - secondary_length, - cache_size_in_bytes / std::max(static_cast(1), static_cast(sizeof(CachedValue_) * needs_value + sizeof(CachedIndex_) * needs_index)), - require_minimum_cache, - std::move(ora) - ) + factory(1, non_target_extract.size(), 1, needs_value, needs_index), + solo(factory.create()) { - if (cache.num_slabs_in_cache == 0) { - solo = Slab(1, secondary_length, needs_value, needs_index); - } - - if (by_column) { - extract_args[0] = secondary_indices; - } else { - extract_args[1] = secondary_indices; - } + extract_args[static_cast(accrow_)] = non_target_extract; } - ~SparseBase() = default; +private + const Rcpp::RObject& mat; + const Rcpp::Function& sparse_extractor; + Rcpp::List extract_args; + + tatami_chunked::SparseSlabFactory factory; + typedef typename decltype(factory)::Slab Slab; + Slab solo; -public: - struct Slab { - Slab() = default; - Slab(size_t max_primary_chunk_length, size_t secondary_length, bool needs_value, bool needs_index) { - if (needs_value) { - value.reserve(max_primary_chunk_length * secondary_length); - } - if (needs_index) { - index.reserve(max_primary_chunk_length * secondary_length); - } - count.reserve(max_primary_chunk_length); +protected: + std::pair fetch_raw(Index_, Value_* vbuffer, Index_* ibuffer) { + if constexpr(oracle_) { + i = cache.cache.next(); } + solo.number[0] = 0; - std::vector value; - std::vector index; - std::vector count; - }; +#ifdef TATAMI_R_PARALLELIZE_UNKNOWN + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { +#endif -protected: + extract_args[static_cast(!accrow_)] = Rcpp::IntegerVector::create(i + 1); + auto obj = sparse_extractor(mat, extract_args); + parse_sparse_matrix(obj, solo.values, solo.indices, solo.number); + +#ifdef TATAMI_R_PARALLELIZE_UNKNOWN + }); +#endif + + return std::make_pair(&solo, static_cast(0)); + } +}; + +template +struct MyopicSparseCore { + MyopicSparseCore( + const Rcpp::RObject& mat, + const Rcpp::Function& sparse_extractor, + [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Sparse*Core classes. + Rcpp::IntegerVector non_target_extract, + Index_ max_target_chunk_length, + const std::vector& ticks, + const std::vector& map, + const tatami_chunked::SlabCacheStats& stats, + bool needs_value, + bool needs_index) : + mat(mat), + sparse_extractor(sparse_extractor), + extract_args(2), + chunk_ticks(ticks), + chunk_map(map), + factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), + cache(stats) + { + extract_args[static_cast(accrow_)] = non_target_extract; + } + +private: const Rcpp::RObject& mat; const Rcpp::Function& sparse_extractor; Rcpp::List extract_args; - Rcpp::IntegerVector secondary_indices; - bool by_column; const std::vector& chunk_ticks; const std::vector& chunk_map; - size_t max_primary_chunk_length; - size_t secondary_length; - bool needs_value; - bool needs_index; - - std::vector chunk_value_ptrs; - std::vector chunk_index_ptrs; - std::vector chunk_counts; - - tatami_chunked::TypicalSlabCacheWorkspace cache; - Slab solo; + tatami_chunked::SparseSlabFactory factory; + typedef typename decltype(factory)::Slab Slab; + tatami_chunked::LruSlabCache cache; protected: - std::pair fetch_raw(Index_ i) { - if (cache.num_slabs_in_cache == 0) { - if constexpr(oracle_) { - i = cache.cache.next(); - } - - if (needs_value) { - chunk_value_ptrs.clear(); - chunk_value_ptrs.push_back(solo.value.data()); - } - if (needs_index) { - chunk_index_ptrs.clear(); - chunk_index_ptrs.push_back(solo.index.data()); - } - solo.count[0] = 0; + std::pair fetch_raw(Index_, Value_* vbuffer, Index_* ibuffer) { + auto chosen = chunk_map[i]; + + const auto& slab = cache.cache.find( + chosen, + [&]() -> Slab { + return factory.create(); + }, + [&](Index_ id, Slab& cache) { + auto chunk_start = chunk_ticks[id], chunk_end = chunk_ticks[id + 1]; + size_t chunk_len = chunk_end - chunk_start; + std::fill_n(cache.number, chunk_len, 0); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { #endif - extract_args[static_cast(by_column)] = Rcpp::IntegerVector::create(i + 1); - auto obj = sparse_extractor(mat, extract_args); - - if (by_column) { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, solo.count); - } else { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, solo.count); - } + Rcpp::IntegerVector primary_extract(chunk_len); + std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); + extract_args[static_cast(!accrow_)] = primary_extract; + auto obj = sparse_extractor(mat, extract_args); + parse_sparse_matrix(obj, cache.values, cache.indices, cache.number); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); + }); #endif + } + ); - return std::make_pair(&solo, static_cast(0)); + Index_ offset = i - chunk_ticks[chosen]; + return std::make_pair(&slab, offset); + } +}; - } else if constexpr(!oracle_) { - auto chosen = chunk_map[i]; +template +struct OracularSparseCore { + OracularSparseCore( + const Rcpp::RObject& mat, + const Rcpp::Function& sparse_extractor, + tatami::MaybeOracle ora, + Rcpp::IntegerVector non_target_extract, + Index_ max_target_chunk_length, + const std::vector& ticks, + const std::vector& map, + const tatami_chunked::SlabCacheStats& stats, + bool needs_value, + bool needs_index) : + mat(mat), + sparse_extractor(sparse_extractor), + extract_args(2), + chunk_ticks(ticks), + chunk_map(map), + factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), + cache(std::move(ora), stats), + needs_value(needs_value), + needs_index(needs_index) + { + extract_args[static_cast(accrow_)] = non_target_extract; + } - const auto& slab = cache.cache.find( - chosen, - [&]() -> Slab { - return Slab(max_primary_chunk_length, secondary_length, needs_value, needs_index); - }, - [&](Index_ id, Slab& cache) { - auto chunk_start = chunk_ticks[id], chunk_end = chunk_ticks[id + 1]; - size_t chunk_len = chunk_end - chunk_start; +private: + const Rcpp::RObject& mat; + const Rcpp::Function& sparse_extractor; + Rcpp::List extract_args; - if (needs_value) { - chunk_value_ptrs.clear(); - auto ptr = cache.value.data(); - for (size_t i = 0; i < chunk_len; ++i, ptr += secondary_length) { - chunk_value_ptrs.push_back(ptr); - } - } - if (needs_index) { - chunk_index_ptrs.clear(); - auto ptr = cache.index.data(); - for (size_t i = 0; i < chunk_len; ++i, ptr += secondary_length) { - chunk_index_ptrs.push_back(ptr); - } - } - cache.count.clear(); - cache.count.resize(chunk_len); + const std::vector& chunk_ticks; + const std::vector& chunk_map; -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { -#endif + tatami_chunked::SparseSlabFactory factory; + typedef typename decltype(factory)::Slab; + tatami_chunked::OracularSlabCache cache; - Rcpp::IntegerVector primary_extract(chunk_len); - std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); - extract_args[static_cast(by_column)] = primary_extract; - auto obj = sparse_extractor(mat, extract_args); + std::vector chunk_value_ptrs; + std::vector chunk_index_ptrs; + std::vector chunk_numbers; - if (by_column) { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, cache.count); - } else { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, cache.count); - } + bool needs_value; + bool needs_index; -#ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); -#endif +public: + std::pair fetch_raw(Index_) { + auto res = cache.next( + [&](Index_ i) -> std::pair { + auto chosen = chunk_map[i]; + return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); + }, + [&]() -> Slab { + return factory.create(); + }, + [&](std::vector >& to_populate) { + // Sorting them so that the indices are in order. + if (!std::is_sorted(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; })) { + std::sort(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; }); + } + + if (needs_value) { + chunk_value_ptrs.clear(); + } + if (needs_index) { + chunk_index_ptrs.clear(); } - ); - return std::make_pair(&slab, static_cast(i - chunk_ticks[chosen])); - - } else { - return cache.cache.next( - [&](Index_ i) -> std::pair { - auto chosen = chunk_map[i]; - return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); - }, - [&]() -> Slab { - return Slab(max_primary_chunk_length, secondary_length, needs_value, needs_index); - }, - [&](std::vector >& to_populate) { - // Sorting them so that the indices are in order. - if (!std::is_sorted(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; })) { - std::sort(to_populate.begin(), to_populate.end(), [&](const std::pair& left, const std::pair right) { return left.first < right.first; }); - } + Index_ total_len = 0; + for (const auto& p : to_populate) { + Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_ticks[p.first]; + total_len += chunk_len; if (needs_value) { - chunk_value_ptrs.clear(); + chunk_value_ptrs.insert(chunk_value_ptrs.end(), p.second->values.begin(), p.second->values.end()); } if (needs_index) { - chunk_index_ptrs.clear(); - } - - Index_ total_len = 0; - for (const auto& p : to_populate) { - Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_ticks[p.first]; - total_len += chunk_len; - if (needs_value) { - auto ptr = p.second->value.data(); - for (Index_ i = 0; i < chunk_len; ++i, ptr += secondary_length) { - chunk_value_ptrs.push_back(ptr); - } - } - if (needs_index) { - auto ptr = p.second->index.data(); - for (Index_ i = 0; i < chunk_len; ++i, ptr += secondary_length) { - chunk_index_ptrs.push_back(ptr); - } - } + chunk_index_ptrs.insert(chunk_index_ptrs.end(), p.second->indices.begin(), p.second->indices.end()); } + } - chunk_counts.clear(); - chunk_counts.resize(total_len); + chunk_numbers.clear(); + chunk_numbers.resize(total_len); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - // This involves some Rcpp initializations, so we lock it just in case. - auto& mexec = executor(); - mexec.run([&]() -> void { + // This involves some Rcpp initializations, so we lock it just in case. + auto& mexec = executor(); + mexec.run([&]() -> void { #endif - Rcpp::IntegerVector primary_extract(total_len); - Index_ current = 0; - for (const auto& p : to_populate) { - Index_ chunk_start = chunk_ticks[p.first]; - Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - auto start = primary_extract.begin() + current; - std::iota(start, start + chunk_len, chunk_start + 1); - current += chunk_len; - } + Rcpp::IntegerVector primary_extract(total_len); + Index_ current = 0; + for (const auto& p : to_populate) { + Index_ chunk_start = chunk_ticks[p.first]; + Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; + auto start = primary_extract.begin() + current; + std::iota(start, start + chunk_len, chunk_start + 1); + current += chunk_len; + } - extract_args[static_cast(by_column)] = primary_extract; - auto obj = sparse_extractor(mat, extract_args); - if (by_column) { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, chunk_counts); - } else { - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, chunk_counts); - } + extract_args[static_cast(by_column)] = primary_extract; + auto obj = sparse_extractor(mat, extract_args); + parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, chunk_numbers.data()); - current = 0; - for (const auto& p : to_populate) { - Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_ticks[p.first]; - p.second->count.resize(chunk_len); - std::copy_n(chunk_counts.begin() + current, chunk_len, p.second->count.begin()); - current += chunk_len; - } + current = 0; + for (const auto& p : to_populate) { + Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_ticks[p.first]; + std::copy_n(chunk_numbers.begin() + current, chunk_len, p.second->number); + current += chunk_len; + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN - }); + }); #endif - } - ); - } + } + ); } }; +template +using SparseCore = typename std::conditional, + typename std::conditional, + MyopicSparseCore + >::type +>::type; + /****************************** *** Pure sparse extractors *** ******************************/ -template -struct SparseFull : public SparseBase, public tatami::SparseExtractor { +template +struct SparseFull : public tatami::SparseExtractor { SparseFull( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, - Index_ secondary_dim, - bool by_column, - Index_ max_primary_chunk_length, + Index_ non_target_dim, + Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache, + const tatami_chunked::SlabCacheStats& stats, bool needs_value, bool needs_index) : - SparseBase( - std::move(mat), - std::move(sparse_extractor), + core( + mat, + sparse_extractor, std::move(ora), [&]() { - Rcpp::IntegerVector output(secondary_dim); + Rcpp::IntegerVector output(non_target_dim); std::iota(output.begin(), output.end(), 1); return output; }(), - by_column, - max_primary_chunk_length, + max_target_chunk_length, ticks, map, - cache_size_in_bytes, - require_minimum_cache, + stats, needs_value, needs_index - ) + ), + non_target_dim(non_target_dim), + needs_value(needs_value), + needs_index(needs_index) {} +private: + SparseCore core; + Index_ non_target_dim; + bool needs_value, needs_index; + public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto res = this->fetch_raw(i); + auto res = core->fetch_raw(i); const auto& slab = *(res.first); Index_ offset = res.second; - tatami::SparseRange output(slab.count[offset]); - if (this->needs_value) { - std::copy_n(slab.value.data() + static_cast(offset) * this->secondary_length, output.number, vbuffer); // cast to size_t to avoid overflow. + tatami::SparseRange output(slab.number[offset]); + if (needs_value) { + std::copy_n(slab.values[offset], non_target_dim, vbuffer); output.value = vbuffer; } - if (this->needs_index) { - std::copy_n(slab.index.data() + static_cast(offset) * this->secondary_length, output.number, ibuffer); + + if (needs_index) { + std::copy_n(slab.indices[offset], non_target_dim, ibuffer); output.index = ibuffer; } @@ -334,7 +337,7 @@ struct SparseFull : public SparseBase +template struct SparseBlock : public SparseBase, public tatami::SparseExtractor { SparseBlock( const Rcpp::RObject& mat, @@ -342,35 +345,38 @@ struct SparseBlock : public SparseBase ora, Index_ block_start, Index_ block_length, - bool by_column, - Index_ max_primary_chunk_length, + Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache, + const tatami_chunked::SlabCacheStats& stats, bool needs_value, bool needs_index) : SparseBase( - std::move(mat), - std::move(sparse_extractor), + mat, + sparse_extractor, std::move(ora), [&]() { Rcpp::IntegerVector output(block_length); std::iota(output.begin(), output.end(), block_start + 1); return output; }(), - by_column, - max_primary_chunk_length, + max_target_chunk_length, ticks, map, - cache_size_in_bytes, - require_minimum_cache, + stats, needs_value, needs_index ), - block_start(block_start) + block_start(block_start), + needs_value(needs_value), + needs_index(needs_index) {} +private: + SparseCore core; + Index_ block_start; + bool needs_value, needs_index; + public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { auto res = this->fetch_raw(i); @@ -378,12 +384,13 @@ struct SparseBlock : public SparseBase output(slab.count[offset]); - if (this->needs_value) { - std::copy_n(slab.value.data() + static_cast(offset) * this->secondary_length, output.number, vbuffer); // cast to size_t to avoid overflow. + if (needs_value) { + std::copy_n(slab.values[offset], output.number, vbuffer); output.value = vbuffer; } - if (this->needs_index) { - auto iptr = slab.index.data() + static_cast(offset) * this->secondary_length; + + if (needs_index) { + auto iptr = slab.index[offset]; for (Index_ i = 0; i < output.number; ++i) { ibuffer[i] = static_cast(iptr[i]) + block_start; } @@ -392,29 +399,24 @@ struct SparseBlock : public SparseBase +template struct SparseIndexed : public SparseBase, public tatami::SparseExtractor { SparseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, tatami::VectorPtr idx_ptr, - bool by_column, - Index_ max_primary_chunk_length, + Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache, + const tatami_chunked::SlabCacheStats& stats, bool needs_value, bool needs_index) : SparseBase( - std::move(mat), - std::move(sparse_extractor), + mat, + sparse_extractor, std::move(ora), [&]() { Rcpp::IntegerVector output(idx_ptr->begin(), idx_ptr->end()); @@ -423,33 +425,37 @@ struct SparseIndexed : public SparseBase core; + tatami::VectorPtr indices_ptr; + bool needs_value, needs_index; + public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto res = this->fetch_raw(i); + auto res = core->fetch_raw(i); const auto& slab = *(res.first); Index_ offset = res.second; tatami::SparseRange output(slab.count[offset]); if (this->needs_value) { - std::copy_n(slab.value.data() + static_cast(offset) * this->secondary_length, output.number, vbuffer); // cast to size_t to avoid overflow. + std::copy_n(slab.values[offset], output.number, vbuffer); output.value = vbuffer; } + if (this->needs_index) { - auto iptr = slab.index.data() + static_cast(offset) * this->secondary_length; + auto iptr = slab.indices[offset]; const auto& indices = *indices_ptr; - for (Index_ i = 0; i < output.number; ++i) { + for (CachedIndex_ i = 0; i < output.number; ++i) { ibuffer[i] = indices[iptr[i]]; } output.index = ibuffer; @@ -457,9 +463,6 @@ struct SparseIndexed : public SparseBase indices_ptr; }; /*********************************** @@ -467,12 +470,12 @@ struct SparseIndexed : public SparseBase -const Value_* densify(const Slab_& slab, Index_ offset, size_t secondary_length, Value_* buffer) { - size_t shift = static_cast(offset) * secondary_length; // cast to size_t to avoid overflow. +const Value_* densify(const Slab_& slab, Index_ offset, size_t non_target_length, Value_* buffer) { + size_t shift = static_cast(offset) * non_target_length; // cast to size_t to avoid overflow. auto vptr = slab.value.data() + shift; auto iptr = slab.index.data() + shift; - std::fill_n(buffer, secondary_length, 0); + std::fill_n(buffer, non_target_length, 0); for (Index_ i = 0, end = slab.count[offset]; i < end; ++i, ++vptr, ++iptr) { buffer[*iptr] = *vptr; } @@ -485,9 +488,9 @@ struct DensifiedSparseFull : public SparseBase ora, - Index_ secondary_dim, + Index_ non_target_dim, bool by_column, - Index_ max_primary_chunk_length, + Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, size_t cache_size_in_bytes, @@ -497,12 +500,12 @@ struct DensifiedSparseFull : public SparseBasefetch_raw(i); - return densify(*(res.first), res.second, this->secondary_length, buffer); + return densify(*(res.first), res.second, this->non_target_length, buffer); } }; @@ -528,7 +531,7 @@ struct DensifiedSparseBlock : public SparseBase& ticks, const std::vector& map, size_t cache_size_in_bytes, @@ -543,7 +546,7 @@ struct DensifiedSparseBlock : public SparseBasefetch_raw(i); - return densify(*(res.first), res.second, this->secondary_length, buffer); + return densify(*(res.first), res.second, this->non_target_length, buffer); } }; @@ -568,7 +571,7 @@ struct DensifiedSparseIndexed : public SparseBase ora, tatami::VectorPtr idx_ptr, bool by_column, - Index_ max_primary_chunk_length, + Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, size_t cache_size_in_bytes, @@ -585,7 +588,7 @@ struct DensifiedSparseIndexed : public SparseBasefetch_raw(i); - return densify(*(res.first), res.second, this->secondary_length, buffer); + return densify(*(res.first), res.second, this->non_target_length, buffer); } }; diff --git a/include/tatami_r/sparse_matrix.hpp b/include/tatami_r/sparse_matrix.hpp index 53adf0d..7998a4a 100644 --- a/include/tatami_r/sparse_matrix.hpp +++ b/include/tatami_r/sparse_matrix.hpp @@ -24,6 +24,9 @@ void parse_sparse_matrix_internal( bool needs_value = !value_ptrs.empty(); bool needs_index = !index_ptrs.empty(); + // Note that non-empty value_ptrs and index_ptrs may be longer than the + // number of rows/columns in the SVT matrix, due to the reuse of slabs. + for (int c = 0; c < NC; ++c) { Rcpp::RObject raw_inner(svt[c]); if (raw_inner == R_NilValue) { From 2e70c3aed6f72c84dd27c578b85c7ef1b1cab963 Mon Sep 17 00:00:00 2001 From: LTLA Date: Wed, 15 May 2024 15:32:32 -0700 Subject: [PATCH 3/5] Got the entire thing to compile. --- include/tatami_r/UnknownMatrix.hpp | 180 +++++++++++++------------- include/tatami_r/dense_extractor.hpp | 41 +++--- include/tatami_r/sparse_extractor.hpp | 165 +++++++++++------------ include/tatami_r/sparse_matrix.hpp | 4 +- tests/src/bindings.cpp | 2 +- 5 files changed, 204 insertions(+), 188 deletions(-) diff --git a/include/tatami_r/UnknownMatrix.hpp b/include/tatami_r/UnknownMatrix.hpp index 54ab53f..587e4fc 100644 --- a/include/tatami_r/UnknownMatrix.hpp +++ b/include/tatami_r/UnknownMatrix.hpp @@ -212,7 +212,7 @@ class UnknownMatrix : public tatami::Matrix { * * @param seed A matrix-like R object. */ - UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), Options()) {} + UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), UnknownMatrixOptions()) {} private: Index_ internal_nrow, internal_ncol; @@ -247,11 +247,11 @@ class UnknownMatrix : public tatami::Matrix { return internal_ncol; } - bool sparse() const { + bool is_sparse() const { return internal_sparse; } - double sparse_proportion() const { + double is_sparse_proportion() const { return static_cast(internal_sparse); } @@ -296,60 +296,66 @@ class UnknownMatrix : public tatami::Matrix { *** Myopic dense *** ********************/ private: - template - void populate_dense_by_sparse(std::unique_ptr >& output, Args_&& ... args) { - if (!internal_sparse) { - output.reset(new UnknownMatrix_internal::DenseFull(std::forward(args)...)); - } else { - output.reset(new UnknownMatrix_internal::DensifiedSparseFull(std::forward(args)...)); - } - } - - template - void populate_dense_by_solo(std::unique_ptr >& output, bool solo, Args_&& ... args) { - if (solo) { - populate_dense_by_sparse(output, std::forward(args)...); - } else { - populate_dense_by_sparse(output, std::forward(args)...); - } - } - - template - void populate_dense_by_accrow(std::unique_ptr >& output, bool row, bool solo, Args_&& ... args) { - if (row) { - populate_dense_by_solo(output, solo, std::forward(args)...); - } else { - populate_dense_by_solo(output, solo, std::forward(args)...); - } - } - - template + template< + bool oracle_, + template class FromDense_, + template class FromSparse_, + typename ... Args_ + > std::unique_ptr > populate_dense_internal(bool row, Index_ non_target_length, tatami::MaybeOracle ora, Args_&& ... args) const { std::unique_ptr > output; Index_ max_target_chunk_length = max_primary_chunk_length(row); tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, cache_size_in_bytes, sizeof(CachedValue_), require_minimum_cache); + const auto& map = chunk_map(row); + const auto& ticks = chunk_ticks(row); + bool solo = stats.max_slabs_in_cache == 0; + #ifdef TATAMI_R_PARALLELIZE_UNKNOWN // This involves some Rcpp initializations, so we lock it just in case. auto& mexec = executor(); mexec.run([&]() -> void { #endif - populate_dense_by_accrow( - output, - row, - stats.max_slabs_in_cache == 0, - // And now all the other arguments. - original_seed, - dense_extractor, - std::move(ora), - std::forward(args)..., - max_target_chunk_length, - chunk_ticks(row), - chunk_map(row), - stats - ); + if (internal_sparse) { + if (row) { + if (solo) { + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); + } else { + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); + } + } else { + if (solo) { + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); + } else { + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); + } + } + + } else { + if (row) { + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); + } else { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); + } + } else { + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); + } else { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); + } + } + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -361,18 +367,18 @@ class UnknownMatrix : public tatami::Matrix { template std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, const tatami::Options&) const { Index_ non_target_dim = secondary_dim(row); - return populate_dense_internal(row, non_target_dim, std::move(ora), non_target_dim); + return populate_dense_internal(row, non_target_dim, std::move(ora), non_target_dim); } template std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options&) const { - return populate_dense_internal(row, block_length, std::move(ora), block_start, block_length); + return populate_dense_internal(row, block_length, std::move(ora), block_start, block_length); } template std::unique_ptr > populate_dense(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options&) const { Index_ nidx = indices_ptr->size(); - return populate_dense_internal(row, nidx, std::move(ora), std::move(indices_ptr)); + return populate_dense_internal(row, nidx, std::move(ora), std::move(indices_ptr)); } public: @@ -408,26 +414,18 @@ class UnknownMatrix : public tatami::Matrix { *** Myopic sparse *** *********************/ public: - template - void populate_sparse_by_solo(std::unique_ptr >& output, bool solo, Args_&& ... args) { - if (solo) { - output.reset(new UnknownMatrix_internal::SparseFull(std::forward(args)...)); - } else { - output.reset(new UnknownMatrix_internal::SparseFull(std::forward(args)...)); - } - } - - template - void populate_sparse_by_accrow(std::unique_ptr >& output, bool row, bool solo, Args_&& ... args) { - if (row) { - populate_sparse_by_solo(output, solo, std::forward(args)...); - } else { - populate_sparse_by_solo(output, solo, std::forward(args)...); - } - } - - template - std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, const tatami::Options& opt, Args_&& ... args) const { + template< + bool oracle_, + template class FromSparse_, + typename ... Args_ + > + std::unique_ptr > populate_sparse_internal( + bool row, + Index_ non_target_length, + tatami::MaybeOracle ora, + const tatami::Options& opt, + Args_&& ... args) + const { std::unique_ptr > output; Index_ max_target_chunk_length = max_primary_chunk_length(row); @@ -439,29 +437,35 @@ class UnknownMatrix : public tatami::Matrix { require_minimum_cache ); + const auto& map = chunk_map(row); + const auto& ticks = chunk_ticks(row); + bool needs_value = opt.sparse_extract_value; + bool needs_index = opt.sparse_extract_index; + bool solo = stats.max_slabs_in_cache == 0; + #ifdef TATAMI_R_PARALLELIZE_UNKNOWN // This involves some Rcpp initializations, so we lock it just in case. auto& mexec = executor(); mexec.run([&]() -> void { #endif - populate_sparse_by_accrow( - output, - row, - stats.max_slabs_in_cache == 0, - // And now the rest of the arguments. - original_seed, - sparse_extractor, - std::move(ora), - std::forward(args)..., - max_primary_chunk_length(row), - chunk_ticks(row), - chunk_map(row), - stats, - require_minimum_cache, - opt.sparse_extract_value, - opt.sparse_extract_index - )); + if (row) { + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); + } else { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); + } + } else { + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); + } else { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); + } + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -473,18 +477,18 @@ class UnknownMatrix : public tatami::Matrix { template std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, const tatami::Options& opt) const { Index_ non_target_dim = secondary_dim(row); - return populate_sparse_internal(row, non_target_dim, std::move(ora), opt, non_target_dim); + return populate_sparse_internal(row, non_target_dim, std::move(ora), opt, non_target_dim); } template std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const { - return populate_sparse_internal(row, block_length, std::move(ora), block_start, block_length, opt); + return populate_sparse_internal(row, block_length, std::move(ora), opt, block_start, block_length); } template std::unique_ptr > populate_sparse(bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const tatami::Options& opt) const { Index_ nidx = indices_ptr->size(); - return populate_sparse_internal(row, nidx, std::move(ora), std::move(indices_ptr), opt); + return populate_sparse_internal(row, nidx, std::move(ora), opt, std::move(indices_ptr)); } public: diff --git a/include/tatami_r/dense_extractor.hpp b/include/tatami_r/dense_extractor.hpp index 704c28f..5e84e7f 100644 --- a/include/tatami_r/dense_extractor.hpp +++ b/include/tatami_r/dense_extractor.hpp @@ -20,7 +20,7 @@ namespace UnknownMatrix_internal { template struct SoloDenseCore { - SoloDenseBase( + SoloDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, tatami::MaybeOracle ora, @@ -30,7 +30,9 @@ struct SoloDenseCore { [[maybe_unused]] const tatami_chunked::SlabCacheStats& stats) : mat(mat), dense_extractor(dense_extractor), - extract_args(2) + extract_args(2), + non_target_length(non_target_extract.size()), + oracle(std::move(ora)) { extract_args[static_cast(accrow_)] = non_target_extract; } @@ -40,11 +42,16 @@ struct SoloDenseCore { const Rcpp::Function& dense_extractor; Rcpp::List extract_args; + size_t non_target_length; + + tatami::MaybeOracle oracle; + size_t counter = 0; + public: template void fetch_raw(Index_ i, Value_* buffer) { if constexpr(oracle_) { - i = cache.cache.next(); + i = oracle->get(counter++); } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN @@ -65,10 +72,10 @@ struct SoloDenseCore { template struct MyopicDenseCore { - MyopicDenseBase( + MyopicDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, - [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Dense*Core classes. + [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Dense*Core classes. Rcpp::IntegerVector non_target_extract, const std::vector& ticks, const std::vector& map, @@ -80,7 +87,7 @@ struct MyopicDenseCore { chunk_map(map), non_target_length(non_target_extract.size()), factory(stats), - cache(max_slabs) + cache(stats.max_slabs_in_cache) { extract_args[static_cast(accrow_)] = non_target_extract; } @@ -95,7 +102,7 @@ struct MyopicDenseCore { size_t non_target_length; tatami_chunked::DenseSlabFactory factory; - typedef typename decltype(factory)::Slab; + typedef typename decltype(factory)::Slab Slab; tatami_chunked::LruSlabCache cache; public: @@ -136,10 +143,10 @@ struct MyopicDenseCore { template struct OracularDenseCore { - OracularDenseBase( + OracularDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, - tatami::MaybeOracle ora, + tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, const std::vector& ticks, const std::vector& map, @@ -151,7 +158,7 @@ struct OracularDenseCore { chunk_map(map), non_target_length(non_target_extract.size()), factory(stats), - cache(std::move(ora), max_slabs) + cache(std::move(ora), stats.max_slabs_in_cache) { extract_args[static_cast(accrow_)] = non_target_extract; } @@ -166,13 +173,13 @@ struct OracularDenseCore { size_t non_target_length; tatami_chunked::DenseSlabFactory factory; - typedef typename decltype(factory)::Slab; + typedef typename decltype(factory)::Slab Slab; tatami_chunked::OracularSlabCache cache; -public +public: template void fetch_raw(Index_, Value_* buffer) { - auto res = cache.cache.next( + auto res = cache.next( [&](Index_ i) -> std::pair { auto chosen = chunk_map[i]; return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); @@ -272,7 +279,7 @@ struct DenseFull : public tatami::DenseExtractor { public: const Value_* fetch(Index_ i, Value_* buffer) { - core->fetch_raw(i, buffer); + core.fetch_raw(i, buffer); return buffer; } }; @@ -308,12 +315,12 @@ struct DenseBlock : public tatami::DenseExtractor { public: const Value_* fetch(Index_ i, Value_* buffer) { - core->fetch_raw(i, buffer); + core.fetch_raw(i, buffer); return buffer; } }; -template +template struct DenseIndexed : public tatami::DenseExtractor { DenseIndexed( const Rcpp::RObject& mat, @@ -345,7 +352,7 @@ struct DenseIndexed : public tatami::DenseExtractor { public: const Value_* fetch(Index_ i, Value_* buffer) { - core->fetch_raw(i, buffer); + core.fetch_raw(i, buffer); return buffer; } }; diff --git a/include/tatami_r/sparse_extractor.hpp b/include/tatami_r/sparse_extractor.hpp index 7d9f0d3..2e66296 100644 --- a/include/tatami_r/sparse_extractor.hpp +++ b/include/tatami_r/sparse_extractor.hpp @@ -17,9 +17,9 @@ namespace UnknownMatrix_internal { *** Core classes *** ********************/ -template -struct SparseSoloCore { - SparseSoloCore( +template +struct SoloSparseCore { + SoloSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, @@ -34,12 +34,13 @@ struct SparseSoloCore { sparse_extractor(sparse_extractor), extract_args(2), factory(1, non_target_extract.size(), 1, needs_value, needs_index), - solo(factory.create()) + solo(factory.create()), + oracle(std::move(ora)) { extract_args[static_cast(accrow_)] = non_target_extract; } -private +private: const Rcpp::RObject& mat; const Rcpp::Function& sparse_extractor; Rcpp::List extract_args; @@ -48,10 +49,13 @@ private typedef typename decltype(factory)::Slab Slab; Slab solo; -protected: - std::pair fetch_raw(Index_, Value_* vbuffer, Index_* ibuffer) { + tatami::MaybeOracle oracle; + size_t counter = 0; + +public: + std::pair fetch_raw(Index_ i) { if constexpr(oracle_) { - i = cache.cache.next(); + i = oracle->get(counter++); } solo.number[0] = 0; @@ -73,12 +77,12 @@ private } }; -template +template struct MyopicSparseCore { MyopicSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, - [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Sparse*Core classes. + [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Sparse*Core classes. Rcpp::IntegerVector non_target_extract, Index_ max_target_chunk_length, const std::vector& ticks, @@ -92,7 +96,7 @@ struct MyopicSparseCore { chunk_ticks(ticks), chunk_map(map), factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), - cache(stats) + cache(stats.max_slabs_in_cache) { extract_args[static_cast(accrow_)] = non_target_extract; } @@ -109,11 +113,11 @@ struct MyopicSparseCore { typedef typename decltype(factory)::Slab Slab; tatami_chunked::LruSlabCache cache; -protected: - std::pair fetch_raw(Index_, Value_* vbuffer, Index_* ibuffer) { +public: + std::pair fetch_raw(Index_ i) { auto chosen = chunk_map[i]; - const auto& slab = cache.cache.find( + const auto& slab = cache.find( chosen, [&]() -> Slab { return factory.create(); @@ -151,7 +155,7 @@ struct OracularSparseCore { OracularSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, - tatami::MaybeOracle ora, + tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, Index_ max_target_chunk_length, const std::vector& ticks, @@ -165,7 +169,7 @@ struct OracularSparseCore { chunk_ticks(ticks), chunk_map(map), factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), - cache(std::move(ora), stats), + cache(std::move(ora), stats.max_slabs_in_cache), needs_value(needs_value), needs_index(needs_index) { @@ -181,8 +185,8 @@ struct OracularSparseCore { const std::vector& chunk_map; tatami_chunked::SparseSlabFactory factory; - typedef typename decltype(factory)::Slab; - tatami_chunked::OracularSlabCache cache; + typedef typename decltype(factory)::Slab Slab; + tatami_chunked::OracularSlabCache cache; std::vector chunk_value_ptrs; std::vector chunk_index_ptrs; @@ -193,7 +197,7 @@ struct OracularSparseCore { public: std::pair fetch_raw(Index_) { - auto res = cache.next( + return cache.next( [&](Index_ i) -> std::pair { auto chosen = chunk_map[i]; return std::make_pair(chosen, static_cast(i - chunk_ticks[chosen])); @@ -245,7 +249,7 @@ struct OracularSparseCore { current += chunk_len; } - extract_args[static_cast(by_column)] = primary_extract; + extract_args[static_cast(!accrow_)] = primary_extract; auto obj = sparse_extractor(mat, extract_args); parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, chunk_numbers.data()); @@ -264,9 +268,9 @@ struct OracularSparseCore { } }; -template +template using SparseCore = typename std::conditional, + SoloSparseCore, typename std::conditional, MyopicSparseCore @@ -312,13 +316,13 @@ struct SparseFull : public tatami::SparseExtractor { {} private: - SparseCore core; + SparseCore core; Index_ non_target_dim; bool needs_value, needs_index; public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto res = core->fetch_raw(i); + auto res = core.fetch_raw(i); const auto& slab = *(res.first); Index_ offset = res.second; @@ -337,8 +341,8 @@ struct SparseFull : public tatami::SparseExtractor { } }; -template -struct SparseBlock : public SparseBase, public tatami::SparseExtractor { +template +struct SparseBlock : public tatami::SparseExtractor { SparseBlock( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, @@ -351,7 +355,7 @@ struct SparseBlock : public SparseBase( + core( mat, sparse_extractor, std::move(ora), @@ -373,24 +377,24 @@ struct SparseBlock : public SparseBase core; + SparseCore core; Index_ block_start; bool needs_value, needs_index; public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto res = this->fetch_raw(i); + auto res = core.fetch_raw(i); const auto& slab = *(res.first); Index_ offset = res.second; - tatami::SparseRange output(slab.count[offset]); + tatami::SparseRange output(slab.number[offset]); if (needs_value) { std::copy_n(slab.values[offset], output.number, vbuffer); output.value = vbuffer; } if (needs_index) { - auto iptr = slab.index[offset]; + auto iptr = slab.indices[offset]; for (Index_ i = 0; i < output.number; ++i) { ibuffer[i] = static_cast(iptr[i]) + block_start; } @@ -401,8 +405,8 @@ struct SparseBlock : public SparseBase -struct SparseIndexed : public SparseBase, public tatami::SparseExtractor { +template +struct SparseIndexed : public tatami::SparseExtractor { SparseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, @@ -414,7 +418,7 @@ struct SparseIndexed : public SparseBase( + core( mat, sparse_extractor, std::move(ora), @@ -436,23 +440,23 @@ struct SparseIndexed : public SparseBase core; + SparseCore core; tatami::VectorPtr indices_ptr; bool needs_value, needs_index; public: tatami::SparseRange fetch(Index_ i, Value_* vbuffer, Index_* ibuffer) { - auto res = core->fetch_raw(i); + auto res = core.fetch_raw(i); const auto& slab = *(res.first); Index_ offset = res.second; - tatami::SparseRange output(slab.count[offset]); - if (this->needs_value) { + tatami::SparseRange output(slab.number[offset]); + if (needs_value) { std::copy_n(slab.values[offset], output.number, vbuffer); output.value = vbuffer; } - if (this->needs_index) { + if (needs_index) { auto iptr = slab.indices[offset]; const auto& indices = *indices_ptr; for (CachedIndex_ i = 0; i < output.number; ++i) { @@ -471,31 +475,27 @@ struct SparseIndexed : public SparseBase const Value_* densify(const Slab_& slab, Index_ offset, size_t non_target_length, Value_* buffer) { - size_t shift = static_cast(offset) * non_target_length; // cast to size_t to avoid overflow. - auto vptr = slab.value.data() + shift; - auto iptr = slab.index.data() + shift; - + auto vptr = slab.values[offset]; + auto iptr = slab.indices[offset]; std::fill_n(buffer, non_target_length, 0); - for (Index_ i = 0, end = slab.count[offset]; i < end; ++i, ++vptr, ++iptr) { + for (Index_ i = 0, end = slab.number[offset]; i < end; ++i, ++vptr, ++iptr) { buffer[*iptr] = *vptr; } return buffer; } -template -struct DensifiedSparseFull : public SparseBase, public tatami::DenseExtractor { +template +struct DensifiedSparseFull : public tatami::DenseExtractor { DensifiedSparseFull( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, Index_ non_target_dim, - bool by_column, Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - SparseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, sparse_extractor, std::move(ora), @@ -504,39 +504,40 @@ struct DensifiedSparseFull : public SparseBase core; + size_t non_target_dim; + public: const Value_* fetch(Index_ i, Value_* buffer) { - auto res = this->fetch_raw(i); - return densify(*(res.first), res.second, this->non_target_length, buffer); + auto res = core.fetch_raw(i); + return densify(*(res.first), res.second, non_target_dim, buffer); } }; -template -struct DensifiedSparseBlock : public SparseBase, public tatami::DenseExtractor { +template +struct DensifiedSparseBlock : public tatami::DenseExtractor { DensifiedSparseBlock( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, - bool by_column, Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - SparseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, sparse_extractor, std::move(ora), @@ -545,38 +546,39 @@ struct DensifiedSparseBlock : public SparseBase core; + size_t block_length; + public: const Value_* fetch(Index_ i, Value_* buffer) { - auto res = this->fetch_raw(i); - return densify(*(res.first), res.second, this->non_target_length, buffer); + auto res = core.fetch_raw(i); + return densify(*(res.first), res.second, block_length, buffer); } }; -template -struct DensifiedSparseIndexed : public SparseBase, public tatami::DenseExtractor { +template +struct DensifiedSparseIndexed : public tatami::DenseExtractor { DensifiedSparseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, tatami::MaybeOracle ora, tatami::VectorPtr idx_ptr, - bool by_column, Index_ max_target_chunk_length, const std::vector& ticks, const std::vector& map, - size_t cache_size_in_bytes, - bool require_minimum_cache) : - SparseBase( + const tatami_chunked::SlabCacheStats& stats) : + core( mat, sparse_extractor, std::move(ora), @@ -587,21 +589,24 @@ struct DensifiedSparseIndexed : public SparseBasesize()) {} +private: + SparseCore core; + size_t num_indices; + public: const Value_* fetch(Index_ i, Value_* buffer) { - auto res = this->fetch_raw(i); - return densify(*(res.first), res.second, this->non_target_length, buffer); + auto res = core.fetch_raw(i); + return densify(*(res.first), res.second, num_indices, buffer); } }; diff --git a/include/tatami_r/sparse_matrix.hpp b/include/tatami_r/sparse_matrix.hpp index 7998a4a..6ac4360 100644 --- a/include/tatami_r/sparse_matrix.hpp +++ b/include/tatami_r/sparse_matrix.hpp @@ -12,7 +12,7 @@ void parse_sparse_matrix_internal( Rcpp::RObject seed, std::vector& value_ptrs, std::vector& index_ptrs, - std::vector& counts) + Index_* counts) { Rcpp::RObject raw_svt = seed.slot("SVT"); if (raw_svt == R_NilValue) { @@ -90,7 +90,7 @@ void parse_sparse_matrix( Rcpp::RObject seed, std::vector& value_ptrs, std::vector& index_ptrs, - std::vector& counts) + Index_* counts) { auto ctype = get_class_name(seed); if (ctype != "SVT_SparseMatrix") { diff --git a/tests/src/bindings.cpp b/tests/src/bindings.cpp index ebfa125..30c9a20 100644 --- a/tests/src/bindings.cpp +++ b/tests/src/bindings.cpp @@ -19,7 +19,7 @@ typedef Rcpp::XPtr > RatXPtr; //[[Rcpp::export(rng=false)]] SEXP parse(Rcpp::RObject seed, double cache_size, bool require_min) { if (cache_size < 0) { - tatami_r::Options opt; + tatami_r::UnknownMatrixOptions opt; opt.maximum_cache_size = cache_size; opt.require_minimum_cache = require_min; return RatXPtr(new tatami_r::UnknownMatrix(seed, opt)); From bdb2aa7488a19df20940fd48c46bcfba1934f91c Mon Sep 17 00:00:00 2001 From: LTLA Date: Wed, 15 May 2024 23:58:27 -0700 Subject: [PATCH 4/5] More fixes to pass check. --- include/tatami_r/UnknownMatrix.hpp | 4 ++-- include/tatami_r/dense_extractor.hpp | 20 +++++++++++++++++--- include/tatami_r/sparse_extractor.hpp | 12 ++++++++---- tests/src/bindings.cpp | 2 +- 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/include/tatami_r/UnknownMatrix.hpp b/include/tatami_r/UnknownMatrix.hpp index 587e4fc..3514a95 100644 --- a/include/tatami_r/UnknownMatrix.hpp +++ b/include/tatami_r/UnknownMatrix.hpp @@ -310,7 +310,7 @@ class UnknownMatrix : public tatami::Matrix { const auto& map = chunk_map(row); const auto& ticks = chunk_ticks(row); - bool solo = stats.max_slabs_in_cache == 0; + bool solo = (stats.max_slabs_in_cache == 0); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN // This involves some Rcpp initializations, so we lock it just in case. @@ -318,7 +318,7 @@ class UnknownMatrix : public tatami::Matrix { mexec.run([&]() -> void { #endif - if (internal_sparse) { + if (!internal_sparse) { if (row) { if (solo) { typedef FromDense_ ShortDense; diff --git a/include/tatami_r/dense_extractor.hpp b/include/tatami_r/dense_extractor.hpp index 5e84e7f..dfd9525 100644 --- a/include/tatami_r/dense_extractor.hpp +++ b/include/tatami_r/dense_extractor.hpp @@ -62,7 +62,11 @@ struct SoloDenseCore { extract_args[static_cast(!accrow_)] = Rcpp::IntegerVector::create(i + 1); auto obj = dense_extractor(mat, extract_args); - parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); + if constexpr(accrow_) { + parse_dense_matrix(obj, buffer, 0, 0, 1, non_target_length); + } else { + parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -127,8 +131,14 @@ struct MyopicDenseCore { Rcpp::IntegerVector primary_extract(chunk_len); std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); extract_args[static_cast(!accrow_)] = primary_extract; + auto obj = dense_extractor(mat, extract_args); - parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); + if constexpr(accrow_) { + parse_dense_matrix(obj, cache.data, 0, 0, chunk_len, non_target_length); + } else { + parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); + + } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -221,7 +231,11 @@ struct OracularDenseCore { for (const auto& p : to_populate) { auto chunk_start = chunk_ticks[p.first]; Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); + if constexpr(accrow_) { + parse_dense_matrix(obj, p.second->data, current, 0, chunk_len, non_target_length); + } else { + parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); + } current += chunk_len; } diff --git a/include/tatami_r/sparse_extractor.hpp b/include/tatami_r/sparse_extractor.hpp index 2e66296..d95ebb2 100644 --- a/include/tatami_r/sparse_extractor.hpp +++ b/include/tatami_r/sparse_extractor.hpp @@ -223,10 +223,12 @@ struct OracularSparseCore { Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_ticks[p.first]; total_len += chunk_len; if (needs_value) { - chunk_value_ptrs.insert(chunk_value_ptrs.end(), p.second->values.begin(), p.second->values.end()); + auto vIt = p.second->values.begin(); + chunk_value_ptrs.insert(chunk_value_ptrs.end(), vIt, vIt + chunk_len); } if (needs_index) { - chunk_index_ptrs.insert(chunk_index_ptrs.end(), p.second->indices.begin(), p.second->indices.end()); + auto iIt = p.second->indices.begin(); + chunk_index_ptrs.insert(chunk_index_ptrs.end(), iIt, iIt + chunk_len); } } @@ -436,7 +438,9 @@ struct SparseIndexed : public tatami::SparseExtractor { needs_value, needs_index ), - indices_ptr(std::move(idx_ptr)) + indices_ptr(std::move(idx_ptr)), + needs_value(needs_value), + needs_index(needs_index) {} private: @@ -459,7 +463,7 @@ struct SparseIndexed : public tatami::SparseExtractor { if (needs_index) { auto iptr = slab.indices[offset]; const auto& indices = *indices_ptr; - for (CachedIndex_ i = 0; i < output.number; ++i) { + for (Index_ i = 0; i < output.number; ++i) { ibuffer[i] = indices[iptr[i]]; } output.index = ibuffer; diff --git a/tests/src/bindings.cpp b/tests/src/bindings.cpp index 30c9a20..fde9278 100644 --- a/tests/src/bindings.cpp +++ b/tests/src/bindings.cpp @@ -18,7 +18,7 @@ typedef Rcpp::XPtr > RatXPtr; //' @export //[[Rcpp::export(rng=false)]] SEXP parse(Rcpp::RObject seed, double cache_size, bool require_min) { - if (cache_size < 0) { + if (cache_size >= 0) { tatami_r::UnknownMatrixOptions opt; opt.maximum_cache_size = cache_size; opt.require_minimum_cache = require_min; From f65ebb54a8cac014843e08827a60106f43f06a73 Mon Sep 17 00:00:00 2001 From: LTLA Date: Thu, 16 May 2024 08:13:52 -0700 Subject: [PATCH 5/5] Moved the row back to being a runtime parameter. --- include/tatami_r/UnknownMatrix.hpp | 67 ++++++---------------- include/tatami_r/dense_extractor.hpp | 82 ++++++++++++++++----------- include/tatami_r/dense_matrix.hpp | 16 +++--- include/tatami_r/sparse_extractor.hpp | 80 +++++++++++++++++--------- include/tatami_r/sparse_matrix.hpp | 34 ++++++----- 5 files changed, 147 insertions(+), 132 deletions(-) diff --git a/include/tatami_r/UnknownMatrix.hpp b/include/tatami_r/UnknownMatrix.hpp index 3514a95..c83f995 100644 --- a/include/tatami_r/UnknownMatrix.hpp +++ b/include/tatami_r/UnknownMatrix.hpp @@ -298,8 +298,8 @@ class UnknownMatrix : public tatami::Matrix { private: template< bool oracle_, - template class FromDense_, - template class FromSparse_, + template class FromDense_, + template class FromSparse_, typename ... Args_ > std::unique_ptr > populate_dense_internal(bool row, Index_ non_target_length, tatami::MaybeOracle ora, Args_&& ... args) const { @@ -319,41 +319,20 @@ class UnknownMatrix : public tatami::Matrix { #endif if (!internal_sparse) { - if (row) { - if (solo) { - typedef FromDense_ ShortDense; - output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); - } else { - typedef FromDense_ ShortDense; - output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); - } + if (solo) { + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward(args)..., ticks, map, stats)); } else { - if (solo) { - typedef FromDense_ ShortDense; - output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); - } else { - typedef FromDense_ ShortDense; - output.reset(new ShortDense(original_seed, dense_extractor, std::move(ora), std::forward(args)..., ticks, map, stats)); - } + typedef FromDense_ ShortDense; + output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward(args)..., ticks, map, stats)); } - } else { - if (row) { - if (solo) { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); - } else { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); - } + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); } else { - if (solo) { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); - } else { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); - } + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats)); } } @@ -416,7 +395,7 @@ class UnknownMatrix : public tatami::Matrix { public: template< bool oracle_, - template class FromSparse_, + template class FromSparse_, typename ... Args_ > std::unique_ptr > populate_sparse_internal( @@ -449,22 +428,12 @@ class UnknownMatrix : public tatami::Matrix { mexec.run([&]() -> void { #endif - if (row) { - if (solo) { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); - } else { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); - } + if (solo) { + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); } else { - if (solo) { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); - } else { - typedef FromSparse_ ShortSparse; - output.reset(new ShortSparse(original_seed, sparse_extractor, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); - } + typedef FromSparse_ ShortSparse; + output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index)); } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN diff --git a/include/tatami_r/dense_extractor.hpp b/include/tatami_r/dense_extractor.hpp index dfd9525..883bc50 100644 --- a/include/tatami_r/dense_extractor.hpp +++ b/include/tatami_r/dense_extractor.hpp @@ -18,11 +18,12 @@ namespace UnknownMatrix_internal { *** Core classes *** ********************/ -template +template struct SoloDenseCore { SoloDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, [[maybe_unused]] const std::vector& ticks, // provided here for compatibility with the other Dense*Core classes. @@ -31,10 +32,11 @@ struct SoloDenseCore { mat(mat), dense_extractor(dense_extractor), extract_args(2), + row(row), non_target_length(non_target_extract.size()), oracle(std::move(ora)) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -42,6 +44,7 @@ struct SoloDenseCore { const Rcpp::Function& dense_extractor; Rcpp::List extract_args; + bool row; size_t non_target_length; tatami::MaybeOracle oracle; @@ -60,12 +63,12 @@ struct SoloDenseCore { mexec.run([&]() -> void { #endif - extract_args[static_cast(!accrow_)] = Rcpp::IntegerVector::create(i + 1); + extract_args[static_cast(!row)] = Rcpp::IntegerVector::create(i + 1); auto obj = dense_extractor(mat, extract_args); - if constexpr(accrow_) { - parse_dense_matrix(obj, buffer, 0, 0, 1, non_target_length); + if (row) { + parse_dense_matrix(obj, true, buffer, 0, 0, 1, non_target_length); } else { - parse_dense_matrix(obj, buffer, 0, 0, non_target_length, 1); + parse_dense_matrix(obj, false, buffer, 0, 0, non_target_length, 1); } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN @@ -74,11 +77,12 @@ struct SoloDenseCore { } }; -template +template struct MyopicDenseCore { MyopicDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Dense*Core classes. Rcpp::IntegerVector non_target_extract, const std::vector& ticks, @@ -87,13 +91,14 @@ struct MyopicDenseCore { mat(mat), dense_extractor(dense_extractor), extract_args(2), + row(row), + non_target_length(non_target_extract.size()), chunk_ticks(ticks), chunk_map(map), - non_target_length(non_target_extract.size()), factory(stats), cache(stats.max_slabs_in_cache) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -101,9 +106,11 @@ struct MyopicDenseCore { const Rcpp::Function& dense_extractor; Rcpp::List extract_args; + bool row; + size_t non_target_length; + const std::vector& chunk_ticks; const std::vector& chunk_map; - size_t non_target_length; tatami_chunked::DenseSlabFactory factory; typedef typename decltype(factory)::Slab Slab; @@ -130,14 +137,13 @@ struct MyopicDenseCore { size_t chunk_len = chunk_ticks[id + 1] - chunk_start; Rcpp::IntegerVector primary_extract(chunk_len); std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); - extract_args[static_cast(!accrow_)] = primary_extract; + extract_args[static_cast(!row)] = primary_extract; auto obj = dense_extractor(mat, extract_args); - if constexpr(accrow_) { - parse_dense_matrix(obj, cache.data, 0, 0, chunk_len, non_target_length); + if (row) { + parse_dense_matrix(obj, true, cache.data, 0, 0, chunk_len, non_target_length); } else { - parse_dense_matrix(obj, cache.data, 0, 0, non_target_length, chunk_len); - + parse_dense_matrix(obj, false, cache.data, 0, 0, non_target_length, chunk_len); } #ifdef TATAMI_R_PARALLELIZE_UNKNOWN @@ -151,11 +157,12 @@ struct MyopicDenseCore { } }; -template +template struct OracularDenseCore { OracularDenseCore( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, const std::vector& ticks, @@ -164,13 +171,14 @@ struct OracularDenseCore { mat(mat), dense_extractor(dense_extractor), extract_args(2), + row(row), + non_target_length(non_target_extract.size()), chunk_ticks(ticks), chunk_map(map), - non_target_length(non_target_extract.size()), factory(stats), cache(std::move(ora), stats.max_slabs_in_cache) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -178,9 +186,11 @@ struct OracularDenseCore { const Rcpp::Function& dense_extractor; Rcpp::List extract_args; + bool row; + size_t non_target_length; + const std::vector& chunk_ticks; const std::vector& chunk_map; - size_t non_target_length; tatami_chunked::DenseSlabFactory factory; typedef typename decltype(factory)::Slab Slab; @@ -224,17 +234,17 @@ struct OracularDenseCore { current += chunk_len; } - extract_args[static_cast(!accrow_)] = primary_extract; + extract_args[static_cast(!row)] = primary_extract; auto obj = dense_extractor(mat, extract_args); current = 0; for (const auto& p : to_populate) { auto chunk_start = chunk_ticks[p.first]; Index_ chunk_len = chunk_ticks[p.first + 1] - chunk_start; - if constexpr(accrow_) { - parse_dense_matrix(obj, p.second->data, current, 0, chunk_len, non_target_length); + if (row) { + parse_dense_matrix(obj, true, p.second->data, current, 0, chunk_len, non_target_length); } else { - parse_dense_matrix(obj, p.second->data, 0, current, non_target_length, chunk_len); + parse_dense_matrix(obj, false, p.second->data, 0, current, non_target_length, chunk_len); } current += chunk_len; } @@ -250,12 +260,12 @@ struct OracularDenseCore { } }; -template +template using DenseCore = typename std::conditional, + SoloDenseCore, typename std::conditional, - MyopicDenseCore + OracularDenseCore, + MyopicDenseCore >::type >::type; @@ -263,11 +273,12 @@ using DenseCore = typename std::conditional +template struct DenseFull : public tatami::DenseExtractor { DenseFull( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, tatami::MaybeOracle ora, Index_ non_target_dim, const std::vector& ticks, @@ -276,6 +287,7 @@ struct DenseFull : public tatami::DenseExtractor { core( mat, dense_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(non_target_dim); @@ -289,7 +301,7 @@ struct DenseFull : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { @@ -298,11 +310,12 @@ struct DenseFull : public tatami::DenseExtractor { } }; -template +template struct DenseBlock : public tatami::DenseExtractor { DenseBlock( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, @@ -312,6 +325,7 @@ struct DenseBlock : public tatami::DenseExtractor { core( mat, dense_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(block_length); @@ -325,7 +339,7 @@ struct DenseBlock : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { @@ -334,11 +348,12 @@ struct DenseBlock : public tatami::DenseExtractor { } }; -template +template struct DenseIndexed : public tatami::DenseExtractor { DenseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& dense_extractor, + bool row, tatami::MaybeOracle ora, tatami::VectorPtr indices_ptr, const std::vector& ticks, @@ -347,6 +362,7 @@ struct DenseIndexed : public tatami::DenseExtractor { core( mat, dense_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(indices_ptr->begin(), indices_ptr->end()); @@ -362,7 +378,7 @@ struct DenseIndexed : public tatami::DenseExtractor { {} private: - DenseCore core; + DenseCore core; public: const Value_* fetch(Index_ i, Value_* buffer) { diff --git a/include/tatami_r/dense_matrix.hpp b/include/tatami_r/dense_matrix.hpp index dca61f4..6f8e82f 100644 --- a/include/tatami_r/dense_matrix.hpp +++ b/include/tatami_r/dense_matrix.hpp @@ -6,11 +6,11 @@ namespace tatami_r { -template -void parse_dense_matrix_internal(const InputObject_& y, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { +template +void parse_dense_matrix_internal(const InputObject_& y, bool row, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { auto input = static_cast(y.begin()) + start_row + start_col * static_cast(y.rows()); - if constexpr(transpose_) { + if (row) { // y is a column-major matrix, but transpose() expects a row-major // input, so we just conceptually transpose it. tatami::transpose(input, num_cols, num_rows, y.rows(), cache, num_cols); @@ -23,18 +23,18 @@ void parse_dense_matrix_internal(const InputObject_& y, CachedValue_* cache, siz } } -template -void parse_dense_matrix(const Rcpp::RObject& seed, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { +template +void parse_dense_matrix(const Rcpp::RObject& seed, bool row, CachedValue_* cache, size_t start_row, size_t start_col, size_t num_rows, size_t num_cols) { auto stype = seed.sexp_type(); if (stype == REALSXP) { Rcpp::NumericMatrix y(seed); - parse_dense_matrix_internal(y, cache, start_row, start_col, num_rows, num_cols); + parse_dense_matrix_internal(y, row, cache, start_row, start_col, num_rows, num_cols); } else if (stype == INTSXP) { Rcpp::IntegerMatrix y(seed); - parse_dense_matrix_internal(y, cache, start_row, start_col, num_rows, num_cols); + parse_dense_matrix_internal(y, row, cache, start_row, start_col, num_rows, num_cols); } else if (stype == LGLSXP) { Rcpp::LogicalMatrix y(seed); - parse_dense_matrix_internal(y, cache, start_row, start_col, num_rows, num_cols); + parse_dense_matrix_internal(y, row, cache, start_row, start_col, num_rows, num_cols); } else { throw std::runtime_error("unsupported SEXP type (" + std::to_string(stype) + ") from the matrix returned by 'extract_array'"); } diff --git a/include/tatami_r/sparse_extractor.hpp b/include/tatami_r/sparse_extractor.hpp index d95ebb2..a1f756b 100644 --- a/include/tatami_r/sparse_extractor.hpp +++ b/include/tatami_r/sparse_extractor.hpp @@ -17,11 +17,12 @@ namespace UnknownMatrix_internal { *** Core classes *** ********************/ -template +template struct SoloSparseCore { SoloSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, [[maybe_unused]] Index_ max_target_chunk_length, // provided here for compatibility with the other Sparse*Core classes. @@ -33,11 +34,12 @@ struct SoloSparseCore { mat(mat), sparse_extractor(sparse_extractor), extract_args(2), + row(row), factory(1, non_target_extract.size(), 1, needs_value, needs_index), solo(factory.create()), oracle(std::move(ora)) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -45,6 +47,8 @@ struct SoloSparseCore { const Rcpp::Function& sparse_extractor; Rcpp::List extract_args; + bool row; + tatami_chunked::SparseSlabFactory factory; typedef typename decltype(factory)::Slab Slab; Slab solo; @@ -65,9 +69,9 @@ struct SoloSparseCore { mexec.run([&]() -> void { #endif - extract_args[static_cast(!accrow_)] = Rcpp::IntegerVector::create(i + 1); + extract_args[static_cast(!row)] = Rcpp::IntegerVector::create(i + 1); auto obj = sparse_extractor(mat, extract_args); - parse_sparse_matrix(obj, solo.values, solo.indices, solo.number); + parse_sparse_matrix(obj, row, solo.values, solo.indices, solo.number); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -77,11 +81,12 @@ struct SoloSparseCore { } }; -template +template struct MyopicSparseCore { MyopicSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, [[maybe_unused]] tatami::MaybeOracle ora, // provided here for compatibility with the other Sparse*Core classes. Rcpp::IntegerVector non_target_extract, Index_ max_target_chunk_length, @@ -93,12 +98,13 @@ struct MyopicSparseCore { mat(mat), sparse_extractor(sparse_extractor), extract_args(2), + row(row), chunk_ticks(ticks), chunk_map(map), factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), cache(stats.max_slabs_in_cache) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -106,6 +112,8 @@ struct MyopicSparseCore { const Rcpp::Function& sparse_extractor; Rcpp::List extract_args; + bool row; + const std::vector& chunk_ticks; const std::vector& chunk_map; @@ -135,9 +143,9 @@ struct MyopicSparseCore { Rcpp::IntegerVector primary_extract(chunk_len); std::iota(primary_extract.begin(), primary_extract.end(), chunk_start + 1); - extract_args[static_cast(!accrow_)] = primary_extract; + extract_args[static_cast(!row)] = primary_extract; auto obj = sparse_extractor(mat, extract_args); - parse_sparse_matrix(obj, cache.values, cache.indices, cache.number); + parse_sparse_matrix(obj, row, cache.values, cache.indices, cache.number); #ifdef TATAMI_R_PARALLELIZE_UNKNOWN }); @@ -150,11 +158,12 @@ struct MyopicSparseCore { } }; -template +template struct OracularSparseCore { OracularSparseCore( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Rcpp::IntegerVector non_target_extract, Index_ max_target_chunk_length, @@ -166,6 +175,7 @@ struct OracularSparseCore { mat(mat), sparse_extractor(sparse_extractor), extract_args(2), + row(row), chunk_ticks(ticks), chunk_map(map), factory(max_target_chunk_length, non_target_extract.size(), stats, needs_value, needs_index), @@ -173,7 +183,7 @@ struct OracularSparseCore { needs_value(needs_value), needs_index(needs_index) { - extract_args[static_cast(accrow_)] = non_target_extract; + extract_args[static_cast(row)] = non_target_extract; } private: @@ -181,6 +191,8 @@ struct OracularSparseCore { const Rcpp::Function& sparse_extractor; Rcpp::List extract_args; + bool row; + const std::vector& chunk_ticks; const std::vector& chunk_map; @@ -251,9 +263,9 @@ struct OracularSparseCore { current += chunk_len; } - extract_args[static_cast(!accrow_)] = primary_extract; + extract_args[static_cast(!row)] = primary_extract; auto obj = sparse_extractor(mat, extract_args); - parse_sparse_matrix(obj, chunk_value_ptrs, chunk_index_ptrs, chunk_numbers.data()); + parse_sparse_matrix(obj, row, chunk_value_ptrs, chunk_index_ptrs, chunk_numbers.data()); current = 0; for (const auto& p : to_populate) { @@ -270,12 +282,12 @@ struct OracularSparseCore { } }; -template +template using SparseCore = typename std::conditional, + SoloSparseCore, typename std::conditional, - MyopicSparseCore + OracularSparseCore, + MyopicSparseCore >::type >::type; @@ -283,11 +295,12 @@ using SparseCore = typename std::conditional +template struct SparseFull : public tatami::SparseExtractor { SparseFull( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Index_ non_target_dim, Index_ max_target_chunk_length, @@ -299,6 +312,7 @@ struct SparseFull : public tatami::SparseExtractor { core( mat, sparse_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(non_target_dim); @@ -318,7 +332,7 @@ struct SparseFull : public tatami::SparseExtractor { {} private: - SparseCore core; + SparseCore core; Index_ non_target_dim; bool needs_value, needs_index; @@ -343,11 +357,12 @@ struct SparseFull : public tatami::SparseExtractor { } }; -template +template struct SparseBlock : public tatami::SparseExtractor { SparseBlock( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, @@ -360,6 +375,7 @@ struct SparseBlock : public tatami::SparseExtractor { core( mat, sparse_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(block_length); @@ -379,7 +395,7 @@ struct SparseBlock : public tatami::SparseExtractor { {} private: - SparseCore core; + SparseCore core; Index_ block_start; bool needs_value, needs_index; @@ -407,11 +423,12 @@ struct SparseBlock : public tatami::SparseExtractor { } }; -template +template struct SparseIndexed : public tatami::SparseExtractor { SparseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, tatami::VectorPtr idx_ptr, Index_ max_target_chunk_length, @@ -423,6 +440,7 @@ struct SparseIndexed : public tatami::SparseExtractor { core( mat, sparse_extractor, + row, std::move(ora), [&]() { Rcpp::IntegerVector output(idx_ptr->begin(), idx_ptr->end()); @@ -444,7 +462,7 @@ struct SparseIndexed : public tatami::SparseExtractor { {} private: - SparseCore core; + SparseCore core; tatami::VectorPtr indices_ptr; bool needs_value, needs_index; @@ -488,11 +506,12 @@ const Value_* densify(const Slab_& slab, Index_ offset, size_t non_target_length return buffer; } -template +template struct DensifiedSparseFull : public tatami::DenseExtractor { DensifiedSparseFull( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Index_ non_target_dim, Index_ max_target_chunk_length, @@ -502,6 +521,7 @@ struct DensifiedSparseFull : public tatami::DenseExtractor core; + SparseCore core; size_t non_target_dim; public: @@ -529,11 +549,12 @@ struct DensifiedSparseFull : public tatami::DenseExtractor +template struct DensifiedSparseBlock : public tatami::DenseExtractor { DensifiedSparseBlock( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, Index_ block_start, Index_ block_length, @@ -544,6 +565,7 @@ struct DensifiedSparseBlock : public tatami::DenseExtractor core; + SparseCore core; size_t block_length; public: @@ -571,11 +593,12 @@ struct DensifiedSparseBlock : public tatami::DenseExtractor +template struct DensifiedSparseIndexed : public tatami::DenseExtractor { DensifiedSparseIndexed( const Rcpp::RObject& mat, const Rcpp::Function& sparse_extractor, + bool row, tatami::MaybeOracle ora, tatami::VectorPtr idx_ptr, Index_ max_target_chunk_length, @@ -585,6 +608,7 @@ struct DensifiedSparseIndexed : public tatami::DenseExtractorbegin(), idx_ptr->end()); @@ -604,7 +628,7 @@ struct DensifiedSparseIndexed : public tatami::DenseExtractor core; + SparseCore core; size_t num_indices; public: diff --git a/include/tatami_r/sparse_matrix.hpp b/include/tatami_r/sparse_matrix.hpp index 6ac4360..41c62d3 100644 --- a/include/tatami_r/sparse_matrix.hpp +++ b/include/tatami_r/sparse_matrix.hpp @@ -7,9 +7,10 @@ namespace tatami_r { -template +template void parse_sparse_matrix_internal( Rcpp::RObject seed, + bool row, std::vector& value_ptrs, std::vector& index_ptrs, Index_* counts) @@ -60,17 +61,21 @@ void parse_sparse_matrix_internal( throw std::runtime_error("both vectors of an element of the 'SVT' slot in a " + ctype + " object should have the same length"); } - if constexpr(transpose_) { - for (size_t i = 0; i < nnz; ++i) { - auto ix = curindices[i]; - auto& shift = counts[ix]; - if (needs_value) { - value_ptrs[ix][shift] = curvalues[i]; + if (row) { + if (needs_value) { + for (size_t i = 0; i < nnz; ++i) { + auto ix = curindices[i]; + value_ptrs[ix][counts[ix]] = curvalues[i]; } - if (needs_index) { - index_ptrs[ix][shift] = c; + } + if (needs_index) { + for (size_t i = 0; i < nnz; ++i) { + auto ix = curindices[i]; + index_ptrs[ix][counts[ix]] = c; } - ++shift; + } + for (size_t i = 0; i < nnz; ++i) { + ++(counts[curindices[i]]); } } else { @@ -85,9 +90,10 @@ void parse_sparse_matrix_internal( } } -template +template void parse_sparse_matrix( Rcpp::RObject seed, + bool row, std::vector& value_ptrs, std::vector& index_ptrs, Index_* counts) @@ -104,11 +110,11 @@ void parse_sparse_matrix( std::string type = Rcpp::as(seed.slot("type")); if (type == "double") { - parse_sparse_matrix_internal(seed, value_ptrs, index_ptrs, counts); + parse_sparse_matrix_internal(seed, row, value_ptrs, index_ptrs, counts); } else if (type == "integer") { - parse_sparse_matrix_internal(seed, value_ptrs, index_ptrs, counts); + parse_sparse_matrix_internal(seed, row, value_ptrs, index_ptrs, counts); } else if (type == "logical") { - parse_sparse_matrix_internal(seed, value_ptrs, index_ptrs, counts); + parse_sparse_matrix_internal(seed, row, value_ptrs, index_ptrs, counts); } else { throw std::runtime_error("unsupported type '" + type + "' for a " + ctype); }