Skip to content

Commit

Permalink
Basic queries are working
Browse files Browse the repository at this point in the history
  • Loading branch information
tjgreen42 committed Jan 28, 2025
1 parent 3a96c32 commit 5308c15
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 81 deletions.
21 changes: 7 additions & 14 deletions pgvectorscale/src/access_method/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ pub extern "C" fn ambuild(
indexrel: pg_sys::Relation,
index_info: *mut pg_sys::IndexInfo,
) -> *mut pg_sys::IndexBuildResult {
info!("ambuild");
let heap_relation = unsafe { PgRelation::from_pg(heaprel) };
let index_relation = unsafe { PgRelation::from_pg(indexrel) };
let opt = TSVIndexOptions::from_relation(&index_relation);
Expand Down Expand Up @@ -125,7 +124,7 @@ pub extern "C" fn ambuild(
);
}

info!("meta_page: {:?}", meta_page);
debug1!("meta_page: {:?}", meta_page);

let ntuples = do_heap_scan(index_info, &heap_relation, &index_relation, meta_page);

Expand Down Expand Up @@ -257,8 +256,6 @@ fn do_heap_scan(
index_relation: &PgRelation,
meta_page: MetaPage,
) -> usize {
info!("do_heap_scan");

let storage = meta_page.get_storage_type();

let mut mp2 = meta_page.clone();
Expand Down Expand Up @@ -385,26 +382,25 @@ fn finalize_index_build<S: Storage>(
}
}

info!("finalize_index_build done");
state.graph.debug_dump();
// state.graph.debug_dump();
state.graph.get_meta_page().debug_dump();

info!("write done");
debug1!("write done");
assert_eq!(write_stats.num_nodes, state.ntuples);

let writing_took = Instant::now()
.duration_since(write_stats.started)
.as_secs_f64();
if write_stats.num_nodes > 0 {
info!(
debug1!(
"Writing took {}s or {}s/tuple. Avg neighbors: {}",
writing_took,
writing_took / write_stats.num_nodes as f64,
write_stats.num_neighbors / write_stats.num_nodes
);
}
if write_stats.prune_stats.calls > 0 {
info!(
debug1!(
"When pruned for cleanup: avg neighbors before/after {}/{} of {} prunes",
write_stats.prune_stats.num_neighbors_before_prune / write_stats.prune_stats.calls,
write_stats.prune_stats.num_neighbors_after_prune / write_stats.prune_stats.calls,
Expand All @@ -427,7 +423,6 @@ unsafe extern "C" fn build_callback_bq_train(
_tuple_is_alive: bool,
state: *mut std::os::raw::c_void,
) {
info!("build_callback_bq_train");
let state = (state as *mut StorageBuildState).as_mut().unwrap();
match state {
StorageBuildState::SbqSpeedup(bq, state) => {
Expand All @@ -451,7 +446,6 @@ unsafe extern "C" fn build_callback(
_tuple_is_alive: bool,
state: *mut std::os::raw::c_void,
) {
info!("build_callback");
let index_relation = unsafe { PgRelation::from_pg(index) };
let heap_pointer = ItemPointer::with_item_pointer_data(*ctid);
let state = (state as *mut StorageBuildState).as_mut().unwrap();
Expand Down Expand Up @@ -502,13 +496,12 @@ fn build_callback_internal<S: Storage>(
state: &mut BuildState,
storage: &mut S,
) {
info!("build_callback_internal");
check_for_interrupts!();

state.ntuples += 1;

if state.ntuples % 1000 == 0 {
info!(
debug1!(
"Processed {} tuples in {}s which is {}s/tuple. Dist/tuple: Prune: {} search: {}. Stats: {:?}",
state.ntuples,
Instant::now().duration_since(state.started).as_secs_f64(),
Expand All @@ -528,7 +521,7 @@ fn build_callback_internal<S: Storage>(
&mut state.stats,
);

info!("inserting into graph, index_pointer: {:?}", index_pointer);
debug1!("inserting into graph, index_pointer: {:?}", index_pointer);

state
.graph
Expand Down
23 changes: 7 additions & 16 deletions pgvectorscale/src/access_method/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::{cmp::Ordering, collections::HashSet};

use pgrx::{info, PgRelation};
use pgrx::PgRelation;

use crate::access_method::storage::NodeDistanceMeasure;
use crate::util::{HeapPointer, IndexPointer, ItemPointer};
Expand Down Expand Up @@ -97,8 +97,6 @@ impl<QDM, PD> ListSearchResult<QDM, PD> {
gns: &GraphNeighborStore,
storage: &S,
) -> Self {
info!("ListSearchResult::new, start_nodes={:?}", start_nodes);

let neigbors = meta_page.get_num_neighbors() as usize;
let mut res = Self {
tie_break_item_pointer,
Expand Down Expand Up @@ -192,6 +190,7 @@ impl<'a> Graph<'a> {
}
}

#[allow(dead_code)]
pub fn debug_dump(&self) {
self.neighbor_store.debug_dump();
}
Expand Down Expand Up @@ -460,19 +459,17 @@ impl<'a> Graph<'a> {
storage: &S,
stats: &mut InsertStats,
) {
info!(
"graph::update_start_nodes, index_pointer={:?}",
index_pointer
);
let start_nodes = self.meta_page.get_start_nodes();
if let Some(start_nodes) = start_nodes {
if start_nodes.contains(vec.labels()) {
info!("graph::update_start_nodes, already contains");
// TODO: maybe replace overloaded start nodes
return;
}
}

let mut start_nodes = if start_nodes.is_none() {
let mut start_nodes = if let Some(start_nodes) = start_nodes {
start_nodes.clone()
} else {
//TODO probably better set off of centeroids
let start_nodes = StartNodes::new(index_pointer);

Expand All @@ -487,18 +484,12 @@ impl<'a> Graph<'a> {
);

start_nodes
} else {
start_nodes.unwrap().clone()
};

start_nodes.add_node(vec.labels(), index_pointer);

MetaPage::set_start_nodes(index, start_nodes, stats);
*self.meta_page = MetaPage::fetch(index);
info!(
"graph::update_start_nodes, updated, meta_page={:?}",
self.meta_page
);
}

pub fn insert<S: Storage>(
Expand All @@ -509,14 +500,14 @@ impl<'a> Graph<'a> {
storage: &S,
stats: &mut InsertStats,
) {
info!("graph::insert, index_pointer={:?}", index_pointer);
self.update_start_nodes(index, index_pointer, &vec, storage, stats);

let meta_page = self.get_meta_page();

//TODO: make configurable?
let labels = label_vec_to_set(vec.labels());

#[allow(clippy::mutable_key_type)]
let v = self.greedy_search_for_build(
index_pointer,
vec,
Expand Down
3 changes: 2 additions & 1 deletion pgvectorscale/src/access_method/graph_neighbor_store.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::collections::BTreeMap;

use pgrx::info;
use pgrx::pg_sys::TM_Result::TM_SelfModified;

use crate::util::{IndexPointer, ItemPointer};

Expand Down Expand Up @@ -30,6 +29,7 @@ impl BuilderNeighborCache {
}
}

#[allow(dead_code)]
pub fn debug_dump(&self) {
for (k, v) in self.neighbor_map.iter() {
info!("Node: {:?}", k);
Expand Down Expand Up @@ -106,6 +106,7 @@ impl GraphNeighborStore {
};
}

#[allow(dead_code)]
pub fn debug_dump(&self) {
match self {
GraphNeighborStore::Builder(b) => b.debug_dump(),
Expand Down
63 changes: 47 additions & 16 deletions pgvectorscale/src/access_method/labels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,38 @@ pub type LabelVec = Vec<Label>;
pub const INVALID_LABEL: u8 = 0;
pub const MAX_LABELS_PER_NODE: usize = 8;

fn trim(labels: &[Label]) -> &[Label] {
let len = labels.len();
let mut i = len;
while i > 0 && labels[i - 1] == INVALID_LABEL {
i -= 1;
}
&labels[..i]
}

/// Returns true if the two label sets overlap. Assumes labels are sorted.
pub fn test_overlap(labels1: &[Label], labels2: &[Label]) -> bool {
debug_assert!(labels1.is_sorted());
debug_assert!(labels2.is_sorted());
pub fn do_labels_overlap(labels1: &[Label], labels2: &[Label]) -> bool {
debug_assert!(trim(labels1).is_sorted());
debug_assert!(trim(labels2).is_sorted());

// Special case: empty labels overlap
// TODO: confusing
if !labels1.is_empty()
&& !labels2.is_empty()
&& labels1[0] == INVALID_LABEL
&& labels2[0] == INVALID_LABEL
{
return true;
}

let mut i = 0;
let mut j = 0;
while i < labels1.len() && j < labels2.len() {
while i < labels1.len()
&& j < labels2.len()
&& labels1[i] != INVALID_LABEL
&& labels2[j] != INVALID_LABEL
{
#[allow(clippy::comparison_chain)]
if labels1[i] == labels2[j] {
return true;
} else if labels1[i] < labels2[j] {
Expand Down Expand Up @@ -101,6 +125,13 @@ impl LabeledVector {
pub fn labels(&self) -> Option<&[Label]> {
self.labels.as_deref()
}

pub fn do_labels_overlap(&self, other: &[Label]) -> bool {
match self.labels() {
Some(labels) => do_labels_overlap(labels, other),
_ => true,
}
}
}

/// Test cases for test_overlap
Expand All @@ -110,18 +141,18 @@ mod test {

#[test]
fn test_test_overlap() {
assert_eq!(test_overlap(&[], &[]), false);
assert_eq!(test_overlap(&[1], &[]), false);
assert_eq!(test_overlap(&[], &[1]), false);
assert_eq!(test_overlap(&[1], &[1]), true);
assert_eq!(test_overlap(&[1], &[2]), false);
assert_eq!(test_overlap(&[1, 2], &[2]), true);
assert_eq!(test_overlap(&[1, 2], &[3]), false);
assert_eq!(test_overlap(&[1, 2], &[2, 3]), true);
assert_eq!(test_overlap(&[1, 2], &[3, 4]), false);
assert_eq!(test_overlap(&[1, 2], &[2, 3]), true);
assert_eq!(test_overlap(&[1, 2], &[2, 3, 4]), true);
assert_eq!(test_overlap(&[1, 2], &[3, 4, 5]), false);
assert!(!do_labels_overlap(&[], &[]));
assert!(!do_labels_overlap(&[1], &[]));
assert!(!do_labels_overlap(&[], &[1]));
assert!(do_labels_overlap(&[1], &[1]));
assert!(!do_labels_overlap(&[1], &[2]));
assert!(do_labels_overlap(&[1, 2], &[2]));
assert!(!do_labels_overlap(&[1, 2], &[3]));
assert!(do_labels_overlap(&[1, 2], &[2, 3]));
assert!(!do_labels_overlap(&[1, 2], &[3, 4]));
assert!(do_labels_overlap(&[1, 2], &[2, 3]));
assert!(do_labels_overlap(&[1, 2], &[2, 3, 4]));
assert!(!do_labels_overlap(&[1, 2], &[3, 4, 5]));
}

/// Test label_vec_to_set
Expand Down
3 changes: 1 addition & 2 deletions pgvectorscale/src/access_method/meta_page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,7 @@ impl MetaPage {
Self::overwrite(index, &new_meta);
return new_meta;
}
let meta = Self::get_meta_from_page(page);
meta
Self::get_meta_from_page(page)
}
}

Expand Down
33 changes: 20 additions & 13 deletions pgvectorscale/src/access_method/plain_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::{
distance::DistanceFn,
graph::{ListSearchNeighbor, ListSearchResult},
graph_neighbor_store::GraphNeighborStore,
labels::{test_overlap, Label, LabelSet, LabeledVector},
labels::{do_labels_overlap, Label, LabelSet, LabeledVector},
neighbor_with_distance::DistanceWithTieBreak,
pg_vector::PgVector,
plain_node::{ArchivedNode, Node, ReadableNode},
Expand Down Expand Up @@ -90,6 +90,12 @@ impl PlainDistanceMeasure {
stats.record_full_distance_comparison();
(distance_fn)(query, vector)
}

pub fn do_labels_overlap(&self, labels: &[Label]) -> bool {
match self {
PlainDistanceMeasure::Full(query) => query.do_labels_overlap(labels),
}
}
}

/* This is only applicable to plain, so keep here not in storage_common */
Expand Down Expand Up @@ -139,24 +145,14 @@ impl NodeDistanceMeasure for IndexFullDistanceMeasure<'_> {
(self.storage.get_distance_function())(vec1, vec2)
}

// unsafe fn get_labels<S: StatsNodeRead>(
// &self,
// index_pointer: IndexPointer,
// stats: &mut S,
// ) -> &LabelSet {
// let rn = Node::read(self.storage.index, index_pointer, stats);
// let node = rn.get_archived_node();
// node.get_labels()
// }

unsafe fn do_labels_overlap<S: StatsNodeRead>(
&self,
index_pointer: IndexPointer,
stats: &mut S,
) -> bool {
let rn = Node::read(self.storage.index, index_pointer, stats);
let node = rn.get_archived_node();
test_overlap(
do_labels_overlap(
node.get_labels(),
self.readable_node.get_archived_node().get_labels(),
)
Expand Down Expand Up @@ -286,10 +282,11 @@ impl Storage for PlainStorage<'_> {
let rn = unsafe { Node::read(self.index, neighbors_of, stats) };
// Copy neighbors and labels before giving ownership of `rn`` to the distance state
let neighbors: Vec<_> = rn.get_archived_node().iter_neighbors().collect();
#[allow(clippy::map_clone)]
let labels: Vec<LabelSet> = rn
.get_archived_node()
.iter_neighbor_labels()
.map(|x| x.clone())
.map(|x| *x)
.collect();
let dist_state = unsafe { IndexFullDistanceMeasure::with_readable_node(self, rn) };
for (n, label) in neighbors.into_iter().zip(labels) {
Expand Down Expand Up @@ -354,6 +351,16 @@ impl Storage for PlainStorage<'_> {
unsafe { Node::read(self.index, neighbor_index_pointer, &mut lsr.stats) };
let node_neighbor = rn_neighbor.get_archived_node();

// Skip neighbors that have no matching labels with the query
if !lsr
.sdm
.as_ref()
.unwrap()
.do_labels_overlap(node_neighbor.get_labels())
{
continue;
}

let distance = match lsr.sdm.as_ref().unwrap() {
PlainDistanceMeasure::Full(query) => PlainDistanceMeasure::calculate_distance(
self.distance_fn,
Expand Down
Loading

0 comments on commit 5308c15

Please sign in to comment.