-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
76 changed files
with
1,277 additions
and
4,356 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,3 +32,4 @@ LinkingTo: | |
VignetteBuilder: knitr | ||
SystemRequirements: C++17 | ||
RoxygenNote: 7.3.2 | ||
Encoding: UTF-8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,96 +1,30 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(AnnoyIndex) | ||
export(AnnoyIndex_path) | ||
export(AnnoyIndex_search_mult) | ||
export(AnnoyParam) | ||
export(AnnoyParam_directory) | ||
export(AnnoyParam_ntrees) | ||
export(AnnoyParam_search_mult) | ||
export(ExhaustiveIndex) | ||
export(ExhaustiveParam) | ||
export(HnswIndex) | ||
export(HnswIndex_ef_search) | ||
export(HnswIndex_path) | ||
export(HnswParam) | ||
export(HnswParam_directory) | ||
export(HnswParam_ef_construction) | ||
export(HnswParam_ef_search) | ||
export(HnswParam_nlinks) | ||
export(KmknnIndex) | ||
export(KmknnIndex_cluster_centers) | ||
export(KmknnIndex_cluster_info) | ||
export(KmknnParam) | ||
export(KmknnParam_kmeans_args) | ||
export(VptreeIndex) | ||
export(VptreeIndex_nodes) | ||
export(VptreeParam) | ||
export(bndata) | ||
export(bndistance) | ||
export(bnorder) | ||
export(buildAnnoy) | ||
export(buildExhaustive) | ||
export(buildHnsw) | ||
export(buildIndex) | ||
export(buildKmknn) | ||
export(buildVptree) | ||
export(findAnnoy) | ||
export(findExhaustive) | ||
export(findHnsw) | ||
export(findKNN) | ||
export(findKmknn) | ||
export(findMutualNN) | ||
export(findNeighbors) | ||
export(findVptree) | ||
export(queryAnnoy) | ||
export(queryExhaustive) | ||
export(queryHnsw) | ||
export(queryKNN) | ||
export(queryKmknn) | ||
export(queryNeighbors) | ||
export(queryVptree) | ||
export(rangeFindExhaustive) | ||
export(rangeFindKmknn) | ||
export(rangeFindVptree) | ||
export(rangeQueryExhaustive) | ||
export(rangeQueryKmknn) | ||
export(rangeQueryVptree) | ||
exportClasses(AnnoyIndex) | ||
exportClasses(AnnoyParam) | ||
exportClasses(BiocNeighborIndex) | ||
exportClasses(BiocNeighborParam) | ||
exportClasses(ExhaustiveIndex) | ||
exportClasses(ExhaustiveParam) | ||
exportClasses(HnswIndex) | ||
exportClasses(HnswParam) | ||
exportClasses(KmknnIndex) | ||
exportClasses(KmknnParam) | ||
exportClasses(VptreeIndex) | ||
exportClasses(VptreeParam) | ||
exportMethods("[[") | ||
exportMethods("[[<-") | ||
exportMethods(bndata) | ||
exportMethods(bndistance) | ||
exportMethods(bnorder) | ||
exportMethods(buildIndex) | ||
exportMethods(dim) | ||
exportMethods(dimnames) | ||
exportMethods(findKNN) | ||
exportMethods(findNeighbors) | ||
exportMethods(queryKNN) | ||
exportMethods(queryNeighbors) | ||
exportMethods(show) | ||
import(BiocParallel) | ||
import(methods) | ||
importClassesFrom(S4Vectors,character_OR_NULL) | ||
importFrom(BiocParallel,SerialParam) | ||
importFrom(BiocParallel,bpmapply) | ||
importFrom(BiocParallel,bpnworkers) | ||
importFrom(Matrix,t) | ||
importFrom(Rcpp,sourceCpp) | ||
importFrom(S4Vectors,setValidity2) | ||
importFrom(methods,is) | ||
importFrom(methods,new) | ||
importFrom(methods,show) | ||
importFrom(stats,kmeans) | ||
useDynLib(BiocNeighbors) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,33 @@ | ||
#' Build a nearest-neighbor index | ||
#' | ||
#' Build indices for nearest-neighbor searching with different algorithms. | ||
#' | ||
#' @param X A numeric matrix where rows correspond to data points and columns correspond to variables (i.e., dimensions). | ||
#' @param transposed Logical scalar indicating whether \code{X} is transposed, i.e., rows are variables and columns are data points. | ||
#' @param ... Further arguments to be passed to individual methods. | ||
#' @param BNPARAM A \linkS4class{BiocNeighborParam} object specifying the type of index to be constructed. | ||
#' This defaults to a \linkS4class{KmknnParam} object if no argument is supplied. | ||
#' | ||
#' @return | ||
#' An external pointer that can be used in \code{\link{findKNN}} and related functions. | ||
#' This is strictly for use within the same R session, as it cannot be serialized for use in other sessions or processes. | ||
#' | ||
#' @author | ||
#' Aaron Lun | ||
#' | ||
#' @seealso | ||
#' \code{\link{buildIndex,KmknnParam-method}}, | ||
#' \code{\link{buildIndex,VptreeParam-method}}, | ||
#' \code{\link{buildIndex,AnnoyParam-method}} | ||
#' and \code{\link{buildIndex,HnswParam-method}} for specific methods. | ||
#' | ||
#' @examples | ||
#' Y <- matrix(rnorm(100000), ncol=20) | ||
#' (k.out <- buildIndex(Y)) | ||
#' (a.out <- buildIndex(Y, BNPARAM=AnnoyParam())) | ||
#' | ||
#' @aliases | ||
#' buildIndex,missing-method | ||
#' | ||
#' @export | ||
#' @rdname buildIndex | ||
setGeneric("buildIndex", signature=c("BNPARAM"), function(X, ..., BNPARAM) standardGeneric("buildIndex")) | ||
|
||
#' @export | ||
#' @rdname findKNN-methods | ||
setGeneric("findKNN", signature=c("X", "BNPARAM"), function(X, k, ..., BNPARAM) standardGeneric("findKNN")) | ||
|
||
#' @export | ||
#' @rdname queryKNN-methods | ||
setGeneric("queryKNN", signature=c("X", "BNPARAM"), function(X, query, k, ..., BNPARAM) standardGeneric("queryKNN")) | ||
|
||
#' @export | ||
#' @rdname findNeighbors-methods | ||
setGeneric("findNeighbors", signature=c("X", "BNPARAM"), function(X, threshold, ..., BNPARAM) standardGeneric("findNeighbors")) | ||
|
||
#' @export | ||
#' @rdname queryNeighbors-methods | ||
setGeneric("queryNeighbors", signature=c("X", "BNPARAM"), function(X, query, threshold, ..., BNINDEX, BNPARAM) standardGeneric("queryNeighbors")) | ||
|
||
#' @export | ||
setGeneric("bndistance", function(x) standardGeneric("bndistance")) | ||
setGeneric("buildIndex", signature=c("BNPARAM"), function(X, transposed=FALSE, ..., BNPARAM) standardGeneric("buildIndex")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#' The AnnoyParam class | ||
#' | ||
#' A class to hold parameters for the Annoy algorithm for approximate nearest neighbor identification. | ||
#' | ||
#' @param ntrees Integer scalar, number of trees to use for index generation. | ||
#' @param search.mult Numeric scalar, multiplier for the number of points to search. | ||
#' @inheritParams ExhaustiveParam | ||
#' @param BNPARAM An AnnoyParam instance. | ||
#' | ||
#' @details | ||
#' The Approximate nearest neighbors Oh Yeah (Annoy) algorithm is based on recursive hyperplane partitions. | ||
#' Briefly, a tree is constructed where a random hyperplane splits the points into two subsets at each internal node. | ||
#' Leaf nodes are defined when the number of points in a subset falls below a threshold (close to twice the number of dimensions for the settings used here). | ||
#' Multiple trees are constructed in this manner, each of which is different due to the random choice of hyperplanes. | ||
#' For a given query point, each tree is searched to identify the subset of all points in the same leaf node as the query point. | ||
#' The union of these subsets across all trees is exhaustively searched to identify the actual nearest neighbors to the query. | ||
#' | ||
#' The \code{ntrees} parameter controls the trade-off between accuracy and computational work. | ||
#' More trees provide greater accuracy at the cost of more computational work (both in terms of the indexing time and search speed in downstream functions). | ||
#' | ||
#' The \code{search.mult} controls the parameter known as \code{search_k} in the original Annoy documentation. | ||
#' Specifically, \code{search_k} is defined as \code{k * search.mult} where \code{k} is the number of nearest neighbors to identify in downstream functions. | ||
#' This represents the number of points to search exhaustively and determines the run-time balance between speed and accuracy. | ||
#' The default \code{search.mult=ntrees} is based on the Annoy library defaults. | ||
#' Note that this parameter is not actually used in the index construction itself, and is only included here so that the output index fully parametrizes the search. | ||
#' | ||
#' Technically, the index construction algorithm is stochastic but, for various logistical reasons, the seed is hard-coded into the C++ code. | ||
#' This means that the results of the Annoy neighbor searches will be fully deterministic for the same inputs, even though the theory provides no such guarantees. | ||
#' | ||
#' @return | ||
#' The \code{AnnoyParam} constructor returns an instance of the AnnoyParam class. | ||
#' | ||
#' The \code{\link{buildIndex}} method returns an external pointer to an Annoy index. | ||
#' | ||
#' @author | ||
#' Aaron Lun | ||
#' | ||
#' @seealso | ||
#' \linkS4class{BiocNeighborParam}, for the parent class and its available methods. | ||
#' | ||
#' \url{https://github.com/spotify/annoy}, for details on the underlying algorithm. | ||
#' | ||
#' @examples | ||
#' (out <- AnnoyParam()) | ||
#' out[['ntrees']] | ||
#' | ||
#' out[['ntrees']] <- 20L | ||
#' out | ||
#' | ||
#' @aliases | ||
#' AnnoyParam-class | ||
#' show,AnnoyParam-method | ||
#' | ||
#' @docType class | ||
#' | ||
#' @export | ||
#' @importFrom methods new | ||
AnnoyParam <- function(ntrees=50, search.mult=ntrees, distance="Euclidean") { | ||
new("AnnoyParam", ntrees=as.integer(ntrees), distance=distance, search.mult=search.mult) | ||
} | ||
|
||
#' @importFrom S4Vectors setValidity2 | ||
setValidity2("AnnoyParam", function(object) { | ||
msg <- character(0) | ||
|
||
ntrees <- object[['ntrees']] | ||
if (length(ntrees) != 1L || ntrees <= 0L) { | ||
msg <- c(msg, "'ntrees' should be a positive integer scalar") | ||
} | ||
|
||
search.mult <- object[['search.mult']] | ||
if (length(search.mult)!=1L || is.na(search.mult) || search.mult <= 1) { | ||
msg <- c(msg, "'search.mult' should be a numeric scalar greater than 1") | ||
} | ||
|
||
if (length(msg)) return(msg) | ||
return(TRUE) | ||
}) | ||
|
||
#' @export | ||
setMethod("show", "AnnoyParam", function(object) { | ||
callNextMethod() | ||
cat(sprintf("ntrees: %i\n", object[['ntrees']])) | ||
cat(sprintf("search.mult: %i\n", object[['search.mult']])) | ||
}) | ||
|
||
#' @export | ||
#' @rdname AnnoyParam | ||
setMethod("buildIndex", "AnnoyParam", function(X, transposed = FALSE, ..., BNPARAM) { | ||
X <- .coerce_matrix_build(X, transposed) | ||
build_annoy(X, num_trees=BNPARAM@ntrees, distance=BNPARAM@distance) | ||
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#' The ExhaustiveParam class | ||
#' | ||
#' A class to hold parameters for the exhaustive algorithm for exact nearest neighbor identification. | ||
#' | ||
#' @param distance A string specifying the distance metric to use. | ||
#' @param X A numeric matrix where rows correspond to data points and columns correspond to variables (i.e., dimensions). | ||
#' @param transposed Logical scalar indicating whether \code{X} is transposed, i.e., rows are variables and columns are data points. | ||
#' @param ... Further arguments, ignored. | ||
#' @param BNPARAM An ExhaustiveParam instance. | ||
#' | ||
#' @details | ||
#' The exhaustive search computes all pairwise distances between data and query points to identify nearest neighbors of the latter. | ||
#' It has quadratic complexity and is theoretically the worst-performing method; | ||
#' however, it has effectively no overhead from constructing or querying indexing structures, | ||
#' making it faster for in situations where indexing provides little benefit. | ||
#' This includes queries against datasets with few data points or very high dimensionality. | ||
#' | ||
#' All that said, this algorithm is largely provided as a baseline for comparing against the other algorithms. | ||
#' | ||
#' @return | ||
#' The \code{ExhaustiveParam} constructor returns an instance of the ExhaustiveParam class. | ||
#' | ||
#' The \code{\link{buildIndex}} method returns an external pointer to an exhaustive index. | ||
#' | ||
#' @author | ||
#' Allison Vuong | ||
#' | ||
#' @seealso | ||
#' \linkS4class{BiocNeighborParam}, for the parent class and its available methods. | ||
#' | ||
#' @examples | ||
#' (out <- ExhaustiveParam()) | ||
#' | ||
#' @aliases ExhaustiveParam-class | ||
#' @docType class | ||
#' | ||
#' @export | ||
#' @importFrom methods new | ||
ExhaustiveParam <- function(distance="Euclidean") { | ||
new("ExhaustiveParam", distance=distance) | ||
} | ||
|
||
#' @export | ||
#' @rdname ExhaustiveParam | ||
setMethod("buildIndex", "ExhaustiveParam", function(X, transposed = FALSE, ..., BNPARAM) { | ||
X <- .coerce_matrix_build(X, transposed) | ||
build_exhaustive(X, distance=BNPARAM@distance) | ||
}) |
Oops, something went wrong.