Skip to content

Commit

Permalink
Implement full text search (#2280)
Browse files Browse the repository at this point in the history
* Implement graphql directive to add full text search to db

* Tidy up

* update create column query

Co-authored-by: Scott Twiname <[email protected]>

* Add plugin to sanitise fulltext search query

* Update changelogs

* Fix typos

* Fix  tests

---------

Co-authored-by: Ben <[email protected]>
  • Loading branch information
stwiname and bz888 authored Mar 4, 2024
1 parent 8e5165d commit 2cf36b9
Show file tree
Hide file tree
Showing 17 changed files with 306 additions and 25 deletions.
2 changes: 2 additions & 0 deletions packages/node-core/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- Support for Full Text Search (#2280)

## [7.3.1] - 2024-02-29
### Removed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ export class SchemaMigrationService {
for (const index of modelValue.addedIndexes) {
migrationAction.createIndex(modelValue.model, index);
}

if (modelValue.removedFullText) {
migrationAction.dropFullText(modelValue.model);
}

if (modelValue.addedFullText) {
migrationAction.createFullText(modelValue.model);
}
}

for (const relationModel of addedRelations) {
Expand Down
42 changes: 38 additions & 4 deletions packages/node-core/src/db/migration-service/migration-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
GraphQLEntityField,
GraphQLEntityIndex,
GraphQLEnumsType,
GraphQLFullTextType,
GraphQLModelsType,
GraphQLRelationsType,
} from '@subql/utils';
Expand All @@ -19,6 +20,9 @@ export type ModifiedModels = Record<

addedIndexes: GraphQLEntityIndex[];
removedIndexes: GraphQLEntityIndex[];

addedFullText?: GraphQLFullTextType;
removedFullText?: GraphQLFullTextType;
}
>;

Expand All @@ -36,14 +40,25 @@ export interface SchemaChangesType {
modifiedEnums: GraphQLEnumsType[];
}

export function indexesEqual(index1: GraphQLEntityIndex, index2: GraphQLEntityIndex): boolean {
/**
* Checks that 2 string arrays are the same independent of order
* */
function arrayUnorderedMatch(a?: string[], b?: string[]): boolean {
return a?.sort().join(',') === b?.sort().join(',');
}

function indexesEqual(index1: GraphQLEntityIndex, index2: GraphQLEntityIndex): boolean {
return (
index1.fields.join(',') === index2.fields.join(',') &&
arrayUnorderedMatch(index1.fields, index2.fields) &&
index1.unique === index2.unique &&
index1.using === index2.using
);
}

function fullTextEqual(a?: GraphQLFullTextType, b?: GraphQLFullTextType): boolean {
return arrayUnorderedMatch(a?.fields, b?.fields) && a?.language === b?.language;
}

export function hasChanged(changes: SchemaChangesType): boolean {
return Object.values(changes).some((change) =>
Array.isArray(change) ? change.length > 0 : Object.keys(change).length > 0
Expand Down Expand Up @@ -96,7 +111,7 @@ export function compareRelations(
});
}

export function fieldsAreEqual(field1: GraphQLEntityField, field2: GraphQLEntityField): boolean {
function fieldsAreEqual(field1: GraphQLEntityField, field2: GraphQLEntityField): boolean {
return (
field1.name === field2.name &&
field1.type === field2.type &&
Expand Down Expand Up @@ -134,13 +149,25 @@ export function compareModels(
const addedIndexes = model.indexes.filter((index) => !currentModel.indexes.some((i) => indexesEqual(i, index)));
const removedIndexes = currentModel.indexes.filter((index) => !model.indexes.some((i) => indexesEqual(i, index)));

if (addedFields.length || removedFields.length || addedIndexes.length || removedIndexes.length) {
const addedFullText = !fullTextEqual(model.fullText, currentModel.fullText) ? model.fullText : undefined;
const removedFullText = !fullTextEqual(model.fullText, currentModel.fullText) ? currentModel.fullText : undefined;

if (
addedFields.length ||
removedFields.length ||
addedIndexes.length ||
removedIndexes.length ||
addedFullText ||
removedFullText
) {
changes.modifiedModels[model.name] = {
model,
addedFields,
removedFields,
addedIndexes,
removedIndexes,
addedFullText,
removedFullText,
};
}
}
Expand Down Expand Up @@ -195,6 +222,13 @@ export function schemaChangesLoggerMessage(schemaChanges: SchemaChangesType): st
if (changes.removedIndexes.length) {
logMessage += `\tRemoved Indexes: ${formatIndexes(changes.removedIndexes)}\n`;
}

if (changes.addedFullText) {
logMessage += `\tAdded FullText: ${changes.addedFullText.fields.join(', ')}\n`;
}
if (changes.removedFullText) {
logMessage += `\tRemoved FullText\n`;
}
});
if (schemaChanges.addedEnums.length) {
logMessage += `Added Enums: ${formatEnums(schemaChanges.addedEnums)}\n`;
Expand Down
40 changes: 37 additions & 3 deletions packages/node-core/src/db/migration-service/migration.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
// Copyright 2020-2024 SubQuery Pte Ltd authors & contributors
// SPDX-License-Identifier: GPL-3.0

import assert from 'node:assert';
import {SUPPORT_DB} from '@subql/common';
import {
GraphQLEntityField,
GraphQLEntityIndex,
GraphQLEnumsType,
GraphQLFullTextType,
GraphQLModelsType,
GraphQLRelationsType,
hashName,
Expand Down Expand Up @@ -190,6 +192,10 @@ export class Migration {
);
}

if (model.fullText) {
this.createFullText(model);
}

if (this.useSubscription) {
const triggerName = hashName(this.schemaName, 'notify_trigger', sequelizeModel.tableName);
const notifyTriggers = await syncHelper.getTriggers(this.sequelize, triggerName);
Expand Down Expand Up @@ -305,7 +311,7 @@ export class Migration {
singleForeignFieldName: relation.fieldName,
});
this.extraQueries.push(
syncHelper.commentConstraintQuery(`"${this.schemaName}"."${rel.target.tableName}"`, fkConstraint, tags),
syncHelper.commentConstraintQuery(this.schemaName, rel.target.tableName, fkConstraint, tags),
syncHelper.createUniqueIndexQuery(this.schemaName, relatedModel.tableName, relation.foreignKey)
);
break;
Expand All @@ -320,7 +326,7 @@ export class Migration {
foreignFieldName: relation.fieldName,
});
this.extraQueries.push(
syncHelper.commentConstraintQuery(`"${this.schemaName}"."${rel.target.tableName}"`, fkConstraint, tags)
syncHelper.commentConstraintQuery(this.schemaName, rel.target.tableName, fkConstraint, tags)
);
break;
}
Expand All @@ -344,7 +350,7 @@ export class Migration {
const comment = Array.from(keys.values())
.map((tags) => syncHelper.smartTags(tags, '|'))
.join('\n');
const query = syncHelper.commentTableQuery(`"${this.schemaName}"."${tableName}"`, comment);
const query = syncHelper.commentTableQuery(this.schemaName, tableName, comment);
this.extraQueries.push(query);
});
}
Expand Down Expand Up @@ -425,6 +431,34 @@ export class Migration {
});
}

createFullText(model: GraphQLModelsType): void {
assert(model.fullText, `Expected fullText to exist on model ${model.name}`);

const table = modelToTableName(model.name);

const queries = [
syncHelper.createTsVectorColumnQuery(this.schemaName, table, model.fullText.fields, model.fullText.language),
syncHelper.createTsVectorCommentQuery(this.schemaName, table),
syncHelper.createTsVectorIndexQuery(this.schemaName, table),
syncHelper.createSearchFunctionQuery(this.schemaName, table),
syncHelper.commentSearchFunctionQuery(this.schemaName, table),
];

this.mainQueries.push(...queries);
}

dropFullText(model: GraphQLModelsType): void {
const table = modelToTableName(model.name);

const queries = [
syncHelper.dropSearchFunctionQuery(this.schemaName, table),
syncHelper.dropTsVectorIndexQuery(this.schemaName, table),
syncHelper.dropTsVectorColumnQuery(this.schemaName, table),
];

this.mainQueries.push(...queries);
}

// Sequelize model will generate follow query to create hash indexes
// Example SQL: CREATE INDEX "accounts_person_id" ON "polkadot-starter"."accounts" USING hash ("person_id")
// This will be rejected from cockroach db due to syntax error
Expand Down
100 changes: 84 additions & 16 deletions packages/node-core/src/db/sync-helper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,28 @@ export function getUniqConstraint(tableName: string, field: string): string {
return [tableName, field, 'uindex'].map(underscored).join('_');
}

export function commentConstraintQuery(table: string, constraint: string, comment: string): string {
return `COMMENT ON CONSTRAINT ${constraint} ON ${table} IS E'${comment}'`;
function escapedName(...args: string[]): string {
return args.map((a) => `"${a}"`).join('.');
}

export function commentTableQuery(column: string, comment: string): string {
return `COMMENT ON TABLE ${column} IS E'${comment}'`;
function commentOn(type: 'CONSTRAINT' | 'TABLE' | 'COLUMN' | 'FUNCTION', entity: string, comment: string): string {
return `COMMENT ON ${type} ${entity} IS E'${comment}'`;
}

export function commentConstraintQuery(schema: string, table: string, constraint: string, comment: string): string {
return commentOn('CONSTRAINT', escapedName(schema, table), comment);
}

export function commentTableQuery(schema: string, table: string, comment: string): string {
return commentOn('TABLE', escapedName(schema, table), comment);
}

export function commentColumnQuery(schema: string, table: string, column: string, comment: string): string {
return `COMMENT ON COLUMN "${schema}".${table}.${column} IS '${comment}';`;
return commentOn('COLUMN', escapedName(schema, table, column), comment);
}

export function commentOnFunction(schema: string, functionName: string, comment: string): string {
return commentOn('FUNCTION', escapedName(schema, functionName), comment);
}

// This is used when historical is disabled so that we can perform bulk updates
Expand Down Expand Up @@ -195,14 +207,22 @@ END$$;
`;
}

export function dropNotifyTrigger(schema: string, table: string): string {
const triggerName = hashName(schema, 'notify_trigger', table);
function dropTrigger(schema: string, table: string, triggerName: string): string {
return `DROP TRIGGER IF EXISTS "${triggerName}"
ON "${schema}"."${table}";`;
}

export function dropNotifyTrigger(schema: string, table: string): string {
const triggerName = hashName(schema, 'notify_trigger', table);
return dropTrigger(schema, table, triggerName);
}

function dropFunction(schema: string, functionName: string): string {
return `DROP FUNCTION IF EXISTS "${schema}"."${functionName}";`;
}

export function dropNotifyFunction(schema: string): string {
return `DROP FUNCTION IF EXISTS "${schema}".send_notification()`;
return dropFunction(schema, 'send_notification');
}

// Hot schema reload, _metadata table
Expand All @@ -215,7 +235,7 @@ export function createSchemaTrigger(schema: string, metadataTableName: string):
ON "${schema}"."${metadataTableName}"
FOR EACH ROW
WHEN ( new.key = 'schemaMigrationCount')
EXECUTE FUNCTION "${schema}".schema_notification()`;
EXECUTE FUNCTION "${schema}".schema_notification();`;
}

export function createSchemaTriggerFunction(schema: string): string {
Expand Down Expand Up @@ -246,7 +266,7 @@ const DEFAULT_SQL_EXE_BATCH = 2000;
* Improve SQL which could potentially increase DB IO significantly,
* this executes it by batch size, and in ASC id order
**/
export const sqlIterator = (tableName: string, sql: string, batch: number = DEFAULT_SQL_EXE_BATCH) => {
export const sqlIterator = (tableName: string, sql: string, batch: number = DEFAULT_SQL_EXE_BATCH): string => {
return `
DO $$
DECLARE
Expand Down Expand Up @@ -367,6 +387,8 @@ export function generateCreateTableQuery(
Object.keys(attributes).forEach((key) => {
const attr = attributes[key];

assert(attr.field, 'Expected field to be set on attribute');

if (timestampKeys.find((k) => k === attr.field)) {
attr.type = 'timestamp with time zone';
}
Expand All @@ -375,7 +397,7 @@ export function generateCreateTableQuery(

columnDefinitions.push(columnDefinition);
if (attr.comment) {
comments.push(`COMMENT ON COLUMN "${schema}"."${tableName}"."${attr.field}" IS '${attr.comment}';`);
comments.push(commentColumnQuery(schema, tableName, attr.field, attr.comment));
}
if (attr.primaryKey) {
primaryKeyColumns.push(`"${attr.field}"`);
Expand Down Expand Up @@ -443,7 +465,7 @@ export function sortModels(relations: GraphQLRelationsType[], models: GraphQLMod
export function validateNotifyTriggers(triggerName: string, triggers: NotifyTriggerPayload[]): void {
if (triggers.length !== NotifyTriggerManipulationType.length) {
throw new Error(
`Found ${triggers.length} ${triggerName} triggers, expected ${NotifyTriggerManipulationType.length} triggers `
`Found ${triggers.length} ${triggerName} triggers, expected ${NotifyTriggerManipulationType.length} triggers`
);
}
triggers.map((t) => {
Expand Down Expand Up @@ -508,14 +530,15 @@ export async function getExistingEnums(schema: string, sequelize: Sequelize): Pr

// enums
export const dropEnumQuery = (enumTypeValue: string, schema: string): string =>
`DROP TYPE IF EXISTS "${schema}"."${enumTypeValue}"`;
`DROP TYPE IF EXISTS "${schema}"."${enumTypeValue}";`;
export const commentOnEnumQuery = (type: string, comment: string): string => `COMMENT ON TYPE ${type} IS E${comment};`;
export const createEnumQuery = (type: string, enumValues: string): string =>
`CREATE TYPE ${type} AS ENUM (${enumValues});`;

// relations
export const dropRelationQuery = (schemaName: string, tableName: string, fkConstraint: string): string =>
`ALTER TABLE "${schemaName}"."${tableName}" DROP CONSTRAINT IF EXISTS ${fkConstraint};`;

export function generateForeignKeyQuery(attribute: ModelAttributeColumnOptions, tableName: string): string | void {
const references = attribute?.references as ModelAttributeColumnReferencesOptions;
if (!references) {
Expand Down Expand Up @@ -561,13 +584,58 @@ export const createIndexQuery = (indexOptions: IndexesOptions, tableName: string
`CREATE ${indexOptions.unique ? 'UNIQUE ' : ''}INDEX IF NOT EXISTS "${indexOptions.name}" ` +
`ON "${schema}"."${tableName}" ` +
`${indexOptions.using ? `USING ${indexOptions.using} ` : ''}` +
`(${indexOptions.fields.join(', ')})`
`(${indexOptions.fields.join(', ')});`
);
};

// columns
export const dropColumnQuery = (schema: string, columnName: string, tableName: string): string =>
`ALTER TABLE "${schema}"."${modelToTableName(tableName)}" DROP COLUMN IF EXISTS ${columnName};`;

export const createColumnQuery = (schema: string, tableName: string, columnName: string, attributes: string): string =>
`ALTER TABLE IF EXISTS "${schema}"."${tableName}" ADD COLUMN IF NOT EXISTS "${columnName}" ${attributes};`;
export const createColumnQuery = (schema: string, table: string, columnName: string, attributes: string): string =>
`ALTER TABLE ${escapedName(schema, table)} ADD COLUMN IF NOT EXISTS "${columnName}" ${attributes};`;

// fullTextSearch
const TS_VECTOR_COL = '_tsv';

export const dropTsVectorColumnQuery = (schema: string, table: string): string =>
dropColumnQuery(schema, table, TS_VECTOR_COL);
export const createTsVectorColumnQuery = (
schema: string,
table: string,
fields: string[],
language = 'english'
): string => {
const generated = `GENERATED ALWAYS as (to_tsvector('pg_catalog.${language}', ${fields
.map((field) => `coalesce(${escapedName(field)}, '')`)
.join(` || ' ' || `)})) STORED`;
return createColumnQuery(schema, table, TS_VECTOR_COL, `tsvector ${generated}`);
};

export const createTsVectorCommentQuery = (schema: string, table: string): string =>
commentColumnQuery(schema, table, TS_VECTOR_COL, '@omit all');

const tsVectorIndexName = (schema: string, table: string) => hashName(schema, 'fulltext_idx', table);
export const dropTsVectorIndexQuery = (schema: string, table: string): string =>
dropIndexQuery(schema, tsVectorIndexName(schema, table));

export const createTsVectorIndexQuery = (schema: string, table: string): string =>
createIndexQuery({using: 'gist', fields: [TS_VECTOR_COL], name: tsVectorIndexName(schema, table)}, table, schema);

const searchFunctionName = (schema: string, table: string): string => hashName(schema, 'search', table);
export const dropSearchFunctionQuery = (schema: string, table: string): string =>
dropFunction(schema, searchFunctionName(schema, table));
export const createSearchFunctionQuery = (schema: string, table: string): string => {
const functionName = searchFunctionName(schema, table);
return `
create or replace function ${escapedName(schema, functionName)}(search text)
returns setof ${escapedName(schema, table)} as $$
select *
from ${escapedName(schema, table)} as "table"
where "table"."${TS_VECTOR_COL}" @@ to_tsquery(search)
$$ language sql stable;
`;
};

export const commentSearchFunctionQuery = (schema: string, table: string): string =>
commentOnFunction(schema, searchFunctionName(schema, table), `@name search_${table}`);
Loading

0 comments on commit 2cf36b9

Please sign in to comment.