Skip to content

Commit

Permalink
Switch to the new caching utilities in tatami_chunked. (#11)
Browse files Browse the repository at this point in the history
This uses the new slab factories, and removes references to TypicalSlabCache.
We instead generate a separate subclass for each caching policy, which improves
compartmentalization and enables smaller fetch_raw() definitions.

We also update the sparsity-checking method names for the UnknownMatrix, and we
align with the new policy of avoiding non-type-related templated parameters.
This should reduce compile time and binary size without much perf loss.

Finally, a minor bugfix for the cache configuration in the tests.
  • Loading branch information
LTLA authored May 16, 2024
1 parent 4199455 commit e067922
Show file tree
Hide file tree
Showing 6 changed files with 746 additions and 718 deletions.
278 changes: 87 additions & 191 deletions include/tatami_r/UnknownMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include "Rcpp.h"
#include "tatami/tatami.hpp"
#include "tatami_chunked/tatami_chunked.hpp"
#include "dense_extractor.hpp"
#include "sparse_extractor.hpp"

Expand All @@ -17,9 +16,9 @@
namespace tatami_r {

/**
* @brief Options for R matrix extraction.
* @brief Options for data extraction from an `UnknownMatrix`.
*/
struct Options {
struct UnknownMatrixOptions {
/**
* Size of the cache, in bytes.
* If -1, this is determined from `DelayedArray::getAutoBlockSize()`.
Expand Down Expand Up @@ -53,7 +52,7 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
* @param seed A matrix-like R object.
* @param opt Extraction options.
*/
UnknownMatrix(Rcpp::RObject seed, const Options& opt) :
UnknownMatrix(Rcpp::RObject seed, const UnknownMatrixOptions& opt) :
original_seed(seed),
delayed_env(Rcpp::Environment::namespace_env("DelayedArray")),
sparse_env(Rcpp::Environment::namespace_env("SparseArray")),
Expand Down Expand Up @@ -213,7 +212,7 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*
* @param seed A matrix-like R object.
*/
UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), Options()) {}
UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), UnknownMatrixOptions()) {}

private:
Index_ internal_nrow, internal_ncol;
Expand Down Expand Up @@ -248,11 +247,11 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
return internal_ncol;
}

bool sparse() const {
bool is_sparse() const {
return internal_sparse;
}

double sparse_proportion() const {
double is_sparse_proportion() const {
return static_cast<double>(internal_sparse);
}

Expand Down Expand Up @@ -297,42 +296,44 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*** Myopic dense ***
********************/
private:
template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options&) const {
template<
bool oracle_,
template <bool, bool, typename, typename, typename> class FromDense_,
template <bool, bool, typename, typename, typename, typename> class FromSparse_,
typename ... Args_
>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(bool row, Index_ non_target_length, tatami::MaybeOracle<oracle_, Index_> ora, Args_&& ... args) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

Index_ max_target_chunk_length = max_primary_chunk_length(row);
tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, cache_size_in_bytes, sizeof(CachedValue_), require_minimum_cache);

const auto& map = chunk_map(row);
const auto& ticks = chunk_ticks(row);
bool solo = (stats.max_slabs_in_cache == 0);

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseFull<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
if (!internal_sparse) {
if (solo) {
typedef FromDense_<true, oracle_, Value_, Index_, CachedValue_> ShortDense;
output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward<Args_>(args)..., ticks, map, stats));
} else {
typedef FromDense_<false, oracle_, Value_, Index_, CachedValue_> ShortDense;
output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward<Args_>(args)..., ticks, map, stats));
}
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseFull<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
if (solo) {
typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
} else {
typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
}
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
Expand All @@ -343,95 +344,20 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
}

template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options&) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseBlock<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseBlock<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options&) const {
Index_ non_target_dim = secondary_dim(row);
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseFull, UnknownMatrix_internal::DensifiedSparseFull>(row, non_target_dim, std::move(ora), non_target_dim);
}

return output;
template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options&) const {
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseBlock, UnknownMatrix_internal::DensifiedSparseBlock>(row, block_length, std::move(ora), block_start, block_length);
}

template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options&) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseIndexed<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseIndexed<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif

return output;
Index_ nidx = indices_ptr->size();
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseIndexed, UnknownMatrix_internal::DensifiedSparseIndexed>(row, nidx, std::move(ora), std::move(indices_ptr));
}

public:
Expand Down Expand Up @@ -467,63 +393,48 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*** Myopic sparse ***
*********************/
public:
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options& opt) const {
template<
bool oracle_,
template<bool, bool, typename, typename, typename, typename> class FromSparse_,
typename ... Args_
>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
bool row,
Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> ora,
const tatami::Options& opt,
Args_&& ... args)
const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseFull<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
Index_ max_target_chunk_length = max_primary_chunk_length(row);
tatami_chunked::SlabCacheStats stats(
max_target_chunk_length,
non_target_length,
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
(opt.sparse_extract_index ? sizeof(CachedIndex_) : 0) + (opt.sparse_extract_value ? sizeof(CachedValue_) : 0),
require_minimum_cache
);

return output;
}

template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
const auto& map = chunk_map(row);
const auto& ticks = chunk_ticks(row);
bool needs_value = opt.sparse_extract_value;
bool needs_index = opt.sparse_extract_index;
bool solo = stats.max_slabs_in_cache == 0;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseBlock<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));
if (solo) {
typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
} else {
typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
Expand All @@ -533,35 +444,20 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
}

template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseIndexed<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options& opt) const {
Index_ non_target_dim = secondary_dim(row);
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseFull>(row, non_target_dim, std::move(ora), opt, non_target_dim);
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseBlock>(row, block_length, std::move(ora), opt, block_start, block_length);
}

return output;
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
Index_ nidx = indices_ptr->size();
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseIndexed>(row, nidx, std::move(ora), opt, std::move(indices_ptr));
}

public:
Expand Down
Loading

0 comments on commit e067922

Please sign in to comment.