From f1e538a68ac9f9941d4488a90567678940fb2d0c Mon Sep 17 00:00:00 2001 From: Benyamin Salimi Date: Fri, 31 Jan 2025 15:42:45 +0100 Subject: [PATCH] feat(duckdb-driver): Add support for installing and loading DuckDB Community Extensions --- .../configuration/data-sources/duckdb.mdx | 4 +- .../configuration/environment-variables.mdx | 8 +++ packages/cubejs-backend-shared/src/env.ts | 13 ++++ .../cubejs-duckdb-driver/src/DuckDBDriver.ts | 69 ++++++++++--------- 4 files changed, 61 insertions(+), 33 deletions(-) diff --git a/docs/pages/product/configuration/data-sources/duckdb.mdx b/docs/pages/product/configuration/data-sources/duckdb.mdx index 1478383721718..aff44f2a0698b 100644 --- a/docs/pages/product/configuration/data-sources/duckdb.mdx +++ b/docs/pages/product/configuration/data-sources/duckdb.mdx @@ -74,7 +74,7 @@ deployment][ref-demo-deployment] in Cube Cloud. | `CUBEJS_DB_DUCKDB_S3_URL_STYLE` | To choose the S3 URL style(vhost or path) | 'vhost' or 'path' | ❌ | ❌ | | `CUBEJS_DB_DUCKDB_S3_SESSION_TOKEN` | The token for the S3 session | A valid Session Token | ❌ | ✅ | | `CUBEJS_DB_DUCKDB_EXTENSIONS` | A comma-separated list of DuckDB extensions to install and load | A comma-separated list of DuckDB extensions | ❌ | ✅ | - +| `CUBEJS_DB_DUCKDB_COMMUNITY_EXTENSIONS` | A comma-separated list of DuckDB community extensions to install and load | A comma-separated list of DuckDB community extensions | ❌ | ✅ | ## Pre-Aggregation Feature Support ### count_distinct_approx @@ -131,4 +131,4 @@ connections are made over HTTPS. /product/caching/using-pre-aggregations#pre-aggregation-build-strategies [ref-schema-ref-types-formats-countdistinctapprox]: /reference/data-model/types-and-formats#count_distinct_approx [self-preaggs-batching]: #batching -[ref-demo-deployment]: /product/deployment/cloud/deployments#demo-deployments \ No newline at end of file +[ref-demo-deployment]: /product/deployment/cloud/deployments#demo-deployments diff --git a/docs/pages/reference/configuration/environment-variables.mdx b/docs/pages/reference/configuration/environment-variables.mdx index f227eb192cba5..b80dbe576bb07 100644 --- a/docs/pages/reference/configuration/environment-variables.mdx +++ b/docs/pages/reference/configuration/environment-variables.mdx @@ -332,6 +332,14 @@ A comma-separated list of DuckDB extensions to install and load. | ------------------------------------------- | ---------------------- | --------------------- | | A comma-separated list of DuckDB extensions | N/A | N/A | +## `CUBEJS_DB_DUCKDB_COMMUNITY_EXTENSIONS` + +A comma-separated list of DuckDB community extensions to install and load. + +| Possible Values | Default in Development | Default in Production | +| ----------------------------------------------------- | ---------------------- | --------------------- | +| A comma-separated list of DuckDB community extensions | N/A | N/A | + ## `CUBEJS_DB_ELASTIC_APIKEY_ID` The [ID of the API key from elastic.co][elastic-docs-api-keys]. Required when diff --git a/packages/cubejs-backend-shared/src/env.ts b/packages/cubejs-backend-shared/src/env.ts index 33d1854ff7c73..525c1cb11ab68 100644 --- a/packages/cubejs-backend-shared/src/env.ts +++ b/packages/cubejs-backend-shared/src/env.ts @@ -1713,6 +1713,19 @@ const variables: Record any> = { } return []; }, + duckdbCommunityExtensions: ({ + dataSource + }: { + dataSource: string, + }) => { + const extensions = process.env[ + keyByDataSource('CUBEJS_DB_DUCKDB_COMMUNITY_EXTENSIONS', dataSource) + ]; + if (extensions) { + return extensions.split(',').map(e => e.trim()); + } + return []; + }, /** *************************************************************** * Presto Driver * **************************************************************** */ diff --git a/packages/cubejs-duckdb-driver/src/DuckDBDriver.ts b/packages/cubejs-duckdb-driver/src/DuckDBDriver.ts index 8fd887f29c9e4..2314115123746 100644 --- a/packages/cubejs-duckdb-driver/src/DuckDBDriver.ts +++ b/packages/cubejs-duckdb-driver/src/DuckDBDriver.ts @@ -55,10 +55,39 @@ export class DuckDBDriver extends BaseDriver implements DriverInterface { return super.toGenericType(columnType.toLowerCase()); } + private async installExtensions(extensions: string[], execAsync: (sql: string, ...params: any[]) => Promise, repository: string = ''): Promise { + repository = repository ? ` FROM ${repository}` : ''; + for (const extension of extensions) { + try { + await execAsync(`INSTALL ${extension}${repository}`); + } catch (e) { + if (this.logger) { + console.error(`DuckDB - error on installing ${extension}`, { e }); + } + // DuckDB will lose connection_ref on connection on error, this will lead to broken connection object + throw e; + } + } + } + + private async loadExtensions(extensions: string[], execAsync: (sql: string, ...params: any[]) => Promise): Promise { + for (const extension of extensions) { + try { + await execAsync(`LOAD ${extension}`); + } catch (e) { + if (this.logger) { + console.error(`DuckDB - error on loading ${extension}`, { e }); + } + // DuckDB will lose connection_ref on connection on error, this will lead to broken connection object + throw e; + } + } + } + protected async init(): Promise { const token = getEnv('duckdbMotherDuckToken', this.config); const dbPath = getEnv('duckdbDatabasePath', this.config); - + // Determine the database URL based on the provided db_path or token let dbUrl: string; if (dbPath) { @@ -119,7 +148,7 @@ export class DuckDBDriver extends BaseDriver implements DriverInterface { value: getEnv('duckdbS3SessionToken', this.config), } ]; - + for (const { key, value } of configuration) { if (value) { try { @@ -135,34 +164,12 @@ export class DuckDBDriver extends BaseDriver implements DriverInterface { } // Install & load extensions if configured in env variable. - const extensions = getEnv('duckdbExtensions', this.config); - for (const extension of extensions) { - try { - await execAsync(`INSTALL ${extension}`); - } catch (e) { - if (this.logger) { - console.error(`DuckDB - error on installing ${extension}`, { - e - }); - } - - // DuckDB will lose connection_ref on connection on error, this will lead to broken connection object - throw e; - } - - try { - await execAsync(`LOAD ${extension}`); - } catch (e) { - if (this.logger) { - console.error(`DuckDB - error on loading ${extension}`, { - e - }); - } - - // DuckDB will lose connection_ref on connection on error, this will lead to broken connection object - throw e; - } - } + const officialExtensions = getEnv('duckdbExtensions', this.config); + await this.installExtensions(officialExtensions, execAsync); + await this.loadExtensions(officialExtensions, execAsync); + const communityExtensions = getEnv('duckdbCommunityExtensions', this.config); + await this.installExtensions(communityExtensions, execAsync, 'community'); + await this.loadExtensions(communityExtensions, execAsync); if (this.config.initSql) { try { @@ -175,7 +182,7 @@ export class DuckDBDriver extends BaseDriver implements DriverInterface { } } } - + return { defaultConnection, db