Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to the new caching utilities in tatami_chunked. #11

Merged
merged 5 commits into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 87 additions & 191 deletions include/tatami_r/UnknownMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include "Rcpp.h"
#include "tatami/tatami.hpp"
#include "tatami_chunked/tatami_chunked.hpp"
#include "dense_extractor.hpp"
#include "sparse_extractor.hpp"

Expand All @@ -17,9 +16,9 @@
namespace tatami_r {

/**
* @brief Options for R matrix extraction.
* @brief Options for data extraction from an `UnknownMatrix`.
*/
struct Options {
struct UnknownMatrixOptions {
/**
* Size of the cache, in bytes.
* If -1, this is determined from `DelayedArray::getAutoBlockSize()`.
Expand Down Expand Up @@ -53,7 +52,7 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
* @param seed A matrix-like R object.
* @param opt Extraction options.
*/
UnknownMatrix(Rcpp::RObject seed, const Options& opt) :
UnknownMatrix(Rcpp::RObject seed, const UnknownMatrixOptions& opt) :
original_seed(seed),
delayed_env(Rcpp::Environment::namespace_env("DelayedArray")),
sparse_env(Rcpp::Environment::namespace_env("SparseArray")),
Expand Down Expand Up @@ -213,7 +212,7 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*
* @param seed A matrix-like R object.
*/
UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), Options()) {}
UnknownMatrix(Rcpp::RObject seed) : UnknownMatrix(std::move(seed), UnknownMatrixOptions()) {}

private:
Index_ internal_nrow, internal_ncol;
Expand Down Expand Up @@ -248,11 +247,11 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
return internal_ncol;
}

bool sparse() const {
bool is_sparse() const {
return internal_sparse;
}

double sparse_proportion() const {
double is_sparse_proportion() const {
return static_cast<double>(internal_sparse);
}

Expand Down Expand Up @@ -297,42 +296,44 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*** Myopic dense ***
********************/
private:
template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options&) const {
template<
bool oracle_,
template <bool, bool, typename, typename, typename> class FromDense_,
template <bool, bool, typename, typename, typename, typename> class FromSparse_,
typename ... Args_
>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense_internal(bool row, Index_ non_target_length, tatami::MaybeOracle<oracle_, Index_> ora, Args_&& ... args) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

Index_ max_target_chunk_length = max_primary_chunk_length(row);
tatami_chunked::SlabCacheStats stats(max_target_chunk_length, non_target_length, cache_size_in_bytes, sizeof(CachedValue_), require_minimum_cache);

const auto& map = chunk_map(row);
const auto& ticks = chunk_ticks(row);
bool solo = (stats.max_slabs_in_cache == 0);

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseFull<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
if (!internal_sparse) {
if (solo) {
typedef FromDense_<true, oracle_, Value_, Index_, CachedValue_> ShortDense;
output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward<Args_>(args)..., ticks, map, stats));
} else {
typedef FromDense_<false, oracle_, Value_, Index_, CachedValue_> ShortDense;
output.reset(new ShortDense(original_seed, dense_extractor, row, std::move(ora), std::forward<Args_>(args)..., ticks, map, stats));
}
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseFull<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
if (solo) {
typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
} else {
typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats));
}
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
Expand All @@ -343,95 +344,20 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
}

template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options&) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseBlock<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseBlock<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options&) const {
Index_ non_target_dim = secondary_dim(row);
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseFull, UnknownMatrix_internal::DensifiedSparseFull>(row, non_target_dim, std::move(ora), non_target_dim);
}

return output;
template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options&) const {
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseBlock, UnknownMatrix_internal::DensifiedSparseBlock>(row, block_length, std::move(ora), block_start, block_length);
}

template<bool oracle_>
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > populate_dense(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options&) const {
std::unique_ptr<tatami::DenseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

if (!internal_sparse) {
output.reset(new UnknownMatrix_internal::DenseIndexed<oracle_, Value_, Index_, CachedValue_>(
original_seed,
dense_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
} else {
output.reset(new UnknownMatrix_internal::DensifiedSparseIndexed<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache
));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif

return output;
Index_ nidx = indices_ptr->size();
return populate_dense_internal<oracle_, UnknownMatrix_internal::DenseIndexed, UnknownMatrix_internal::DensifiedSparseIndexed>(row, nidx, std::move(ora), std::move(indices_ptr));
}

public:
Expand Down Expand Up @@ -467,63 +393,48 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
*** Myopic sparse ***
*********************/
public:
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options& opt) const {
template<
bool oracle_,
template<bool, bool, typename, typename, typename, typename> class FromSparse_,
typename ... Args_
>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse_internal(
bool row,
Index_ non_target_length,
tatami::MaybeOracle<oracle_, Index_> ora,
const tatami::Options& opt,
Args_&& ... args)
const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseFull<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
secondary_dim(row),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
Index_ max_target_chunk_length = max_primary_chunk_length(row);
tatami_chunked::SlabCacheStats stats(
max_target_chunk_length,
non_target_length,
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
(opt.sparse_extract_index ? sizeof(CachedIndex_) : 0) + (opt.sparse_extract_value ? sizeof(CachedValue_) : 0),
require_minimum_cache
);

return output;
}

template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;
const auto& map = chunk_map(row);
const auto& ticks = chunk_ticks(row);
bool needs_value = opt.sparse_extract_value;
bool needs_index = opt.sparse_extract_index;
bool solo = stats.max_slabs_in_cache == 0;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseBlock<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
block_start,
block_length,
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));
if (solo) {
typedef FromSparse_<true, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
} else {
typedef FromSparse_<false, oracle_, Value_, Index_, CachedValue_, CachedIndex_> ShortSparse;
output.reset(new ShortSparse(original_seed, sparse_extractor, row, std::move(ora), std::forward<Args_>(args)..., max_target_chunk_length, ticks, map, stats, needs_value, needs_index));
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
Expand All @@ -533,35 +444,20 @@ class UnknownMatrix : public tatami::Matrix<Value_, Index_> {
}

template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > output;

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
// This involves some Rcpp initializations, so we lock it just in case.
auto& mexec = executor();
mexec.run([&]() -> void {
#endif

output.reset(new UnknownMatrix_internal::SparseIndexed<oracle_, Value_, Index_, CachedValue_, CachedIndex_>(
original_seed,
sparse_extractor,
std::move(ora),
std::move(indices_ptr),
!row,
max_primary_chunk_length(row),
chunk_ticks(row),
chunk_map(row),
cache_size_in_bytes,
require_minimum_cache,
opt.sparse_extract_value,
opt.sparse_extract_index
));
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, const tatami::Options& opt) const {
Index_ non_target_dim = secondary_dim(row);
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseFull>(row, non_target_dim, std::move(ora), opt, non_target_dim);
}

#ifdef TATAMI_R_PARALLELIZE_UNKNOWN
});
#endif
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, Index_ block_start, Index_ block_length, const tatami::Options& opt) const {
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseBlock>(row, block_length, std::move(ora), opt, block_start, block_length);
}

return output;
template<bool oracle_>
std::unique_ptr<tatami::SparseExtractor<oracle_, Value_, Index_> > populate_sparse(bool row, tatami::MaybeOracle<oracle_, Index_> ora, tatami::VectorPtr<Index_> indices_ptr, const tatami::Options& opt) const {
Index_ nidx = indices_ptr->size();
return populate_sparse_internal<oracle_, UnknownMatrix_internal::SparseIndexed>(row, nidx, std::move(ora), opt, std::move(indices_ptr));
}

public:
Expand Down
Loading
Loading