Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Initial support for grouped join pushdown #9032

Merged
merged 26 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bcbeee9
refactor(cubesql): Extract CubeScanWrappedSqlNode from CubeScanWrappe…
mcheshkov Sep 11, 2024
43b04a5
refactor(cubesql): Extract cube join condition check for rewrites to …
mcheshkov Oct 16, 2024
b0203e7
refactor(cubesql): Remove join_on.*.clone() calls in is_proper_cube_j…
mcheshkov Aug 28, 2024
7fef2d3
fix(cubesql): Allow only left.__cubeJoinField=right.__cubeJoinField a…
mcheshkov Oct 16, 2024
ea01d16
test(cubesql): Forgotten ..Default::default() in one test
mcheshkov Nov 22, 2024
101515a
refactor(cubesql): Introduce PushToCubeContext to wrapped SQL generation
mcheshkov Nov 29, 2024
0d603b7
refactor(cubesql): Use wrapper_pullup_replacer(wrapped_select_joins_e…
mcheshkov Oct 15, 2024
ab7be3f
refactor(cubesql): Introduce copy_value! macro
mcheshkov Nov 29, 2024
8780c3b
feat(cubesql): Add grouped_subqueries to wrapper replacer context
mcheshkov Nov 29, 2024
2b3f82f
refactor(cubesql): Make completely new load query for push to Cube
mcheshkov Nov 11, 2024
5835749
feat: Extend load request with subquery joins
mcheshkov Dec 2, 2024
90203b3
feat(cubesql): Format best plan and cost in debug log
mcheshkov Nov 11, 2024
6fffbfc
feat: Support custom subquery joins in BaseQuery
mcheshkov Nov 20, 2024
b42339a
chore(cubesql): Add join pushdown rules stub
mcheshkov Nov 29, 2024
2338b6b
feat(cubesql): Add single WrappedSelectJoin pullup rule
mcheshkov Nov 29, 2024
f24d10d
feat(cubesql): Add WrappedSelectJoins list pullup rules
mcheshkov Nov 29, 2024
c39d780
feat(cubesql): Allow wrapper pull up to go across joins
mcheshkov Oct 14, 2024
4be7296
feat(cubesql): Allow aggregation flattening to have joins in input
mcheshkov Nov 26, 2024
4b37dff
fix(cubesql): Convert joins schema in WrappedSelect from language to …
mcheshkov Oct 14, 2024
44a4b99
feat(cubesql): Penalize any join in cost more than wrappers
mcheshkov Dec 2, 2024
e0013ca
chore(cubesql): Add comment about async generate_sql_for_expr
mcheshkov Oct 14, 2024
ed4a9e4
feat(cubesql): Handle grouped subqueries in SQL generation for push t…
mcheshkov Dec 6, 2024
7afa70f
feat(cubesql): Rewrite ungrouped-grouped join under wrapper to a sing…
mcheshkov Nov 29, 2024
d954404
test(cubesql): Add grouped join tests
mcheshkov Dec 6, 2024
ef471a6
refactor(cubesql): Turn extend_values to generic, get params by value
mcheshkov Dec 9, 2024
e34feec
test: Add smoke tests for grouped joins
mcheshkov Dec 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions packages/cubejs-api-gateway/openspec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,24 @@ components:
- $ref: "#/components/schemas/V1LoadRequestQueryFilterBase"
- $ref: "#/components/schemas/V1LoadRequestQueryFilterLogicalOr"
- $ref: "#/components/schemas/V1LoadRequestQueryFilterLogicalAnd"
V1LoadRequestQueryJoinSubquery:
type: "object"
properties:
sql:
type: "string"
# TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string?
"on":
type: "string"
# TODO why string? it's enum
joinType:
type: "string"
alias:
type: "string"
required:
- sql
- "on"
- joinType
- alias
V1LoadRequestQuery:
type: "object"
properties:
Expand Down Expand Up @@ -366,6 +384,12 @@ components:
$ref: "#/components/schemas/V1LoadRequestQueryFilterItem"
ungrouped:
type: "boolean"
# vector of (subquery sql: string, join condition: member expression, join type: enum)
# they will be added to end of joinQuery in BaseQuery, in same order as here
subqueryJoins:
type: "array"
items:
$ref: "#/components/schemas/V1LoadRequestQueryJoinSubquery"
V1LoadRequest:
type: "object"
properties:
Expand Down
15 changes: 14 additions & 1 deletion packages/cubejs-api-gateway/src/gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,12 @@ class ApiGateway {
}

private hasExpressionsInQuery(query: Query): boolean {
const arraysToCheck = [query.measures, query.dimensions, query.segments];
const arraysToCheck = [
query.measures,
query.dimensions,
query.segments,
(query.subqueryJoins ?? []).map(join => join.on),
];

return arraysToCheck.some(array => array?.some(item => typeof item === 'string' && item.startsWith('{')));
}
Expand All @@ -1323,6 +1328,10 @@ class ApiGateway {
measures: (query.measures || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)),
dimensions: (query.dimensions || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)),
segments: (query.segments || []).map(m => (typeof m === 'string' ? this.parseMemberExpression(m) : m)),
subqueryJoins: (query.subqueryJoins ?? []).map(join => (typeof join.on === 'string' ? {
...join,
on: this.parseMemberExpression(join.on),
} : join)),
};
}

Expand Down Expand Up @@ -1361,6 +1370,10 @@ class ApiGateway {
measures: (query.measures || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)),
dimensions: (query.dimensions || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)),
segments: (query.segments || []).map(m => (typeof m !== 'string' ? this.evalMemberExpression(m as ParsedMemberExpression) : m)),
subqueryJoins: (query.subqueryJoins ?? []).map(join => (typeof join.on !== 'string' ? {
...join,
on: this.evalMemberExpression(join.on as ParsedMemberExpression)
} : join)),
};
}

Expand Down
10 changes: 10 additions & 0 deletions packages/cubejs-api-gateway/src/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,15 @@ const oneCondition = Joi.object().keys({
and: Joi.array().items(oneFilter, Joi.link('...').description('oneCondition schema')),
}).xor('or', 'and');

const subqueryJoin = Joi.object().keys({
sql: Joi.string(),
// TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string?
// TODO there are three different types instead of alternatives for this actually
on: Joi.alternatives(Joi.string(), memberExpression, parsedMemberExpression),
joinType: Joi.string().valid('LEFT', 'INNER'),
alias: Joi.string(),
});

const querySchema = Joi.object().keys({
// TODO add member expression alternatives only for SQL API queries?
measures: Joi.array().items(Joi.alternatives(id, memberExpression, parsedMemberExpression)),
Expand All @@ -122,6 +131,7 @@ const querySchema = Joi.object().keys({
renewQuery: Joi.boolean(),
ungrouped: Joi.boolean(),
responseFormat: Joi.valid('default', 'compact'),
subqueryJoins: Joi.array().items(subqueryJoin),
});

const normalizeQueryOrder = order => {
Expand Down
12 changes: 12 additions & 0 deletions packages/cubejs-api-gateway/src/types/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ interface QueryTimeDimension {
granularity?: QueryTimeDimensionGranularity;
}

type SubqueryJoins = {
sql: string,
// TODO This is _always_ a member expression, maybe pass as parsed, without intermediate string?
// TODO there are three different types instead of alternatives for this actually
on: string | ParsedMemberExpression | MemberExpression,
joinType: 'LEFT' | 'INNER',
alias: string,
};

/**
* Incoming network query data type.
*/
Expand All @@ -85,6 +94,9 @@ interface Query {
renewQuery?: boolean;
ungrouped?: boolean;
responseFormat?: ResultType;

// TODO incoming query, query with parsed exprs and query with evaluated exprs are all different types
subqueryJoins?: Array<SubqueryJoins>,
}

/**
Expand Down
52 changes: 50 additions & 2 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,23 @@ const SecondsDurations = {
* @property {*} headCommitId
*/

/**
* @typedef {Object} JoinRoot
* @property {string} sql
* @property {string} alias
*/

/**
* @typedef {Object} JoinItem
* @property {string} sql
* @property {string} alias
* @property {string} on
*/

/**
* @typedef {[JoinRoot, ...JoinItem]} JoinChain
*/

/**
* BaseQuery class. BaseQuery object encapsulates the logic of
* transforming an incoming to a specific cube request to the
Expand Down Expand Up @@ -224,6 +241,7 @@ export class BaseQuery {
multiStageQuery: this.options.multiStageQuery,
multiStageDimensions: this.options.multiStageDimensions,
multiStageTimeDimensions: this.options.multiStageTimeDimensions,
subqueryJoins: this.options.subqueryJoins,
});
this.from = this.options.from;
this.multiStageQuery = this.options.multiStageQuery;
Expand Down Expand Up @@ -269,6 +287,11 @@ export class BaseQuery {
this.preAggregationsSchemaOption = this.options.preAggregationsSchema ?? DEFAULT_PREAGGREGATIONS_SCHEMA;
this.externalQueryClass = this.options.externalQueryClass;

/**
* @type {Array<{sql: string, on: {expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}>}
*/
this.customSubQueryJoins = this.options.subqueryJoins ?? [];

// Set the default order only when options.order is not provided at all
// if options.order is set (empty array [] or with data) - use it as is
this.order = this.options.order ?? this.defaultOrder();
Expand Down Expand Up @@ -1604,19 +1627,44 @@ export class BaseQuery {
return this.joinSql([
{ sql: cubeSql, alias: cubeAlias },
...(subQueryDimensionsByCube[join.root] || []).map(d => this.subQueryJoin(d)),
...joins
...joins,
...this.customSubQueryJoins.map((customJoin) => this.customSubQueryJoin(customJoin)),
]);
}

joinSql(toJoin) {
const [root, ...rest] = toJoin;
const joins = rest.map(
j => `LEFT JOIN ${j.sql} ${this.asSyntaxJoin} ${j.alias} ON ${j.on}`
j => {
const joinType = j.joinType ?? 'LEFT';
return `${joinType} JOIN ${j.sql} ${this.asSyntaxJoin} ${j.alias} ON ${j.on}`;
}
);

return [`${root.sql} ${this.asSyntaxJoin} ${root.alias}`, ...joins].join('\n');
}

/**
*
* @param {{sql: string, on: {cubeName: string, expression: Function}, joinType: 'LEFT' | 'INNER', alias: string}} customJoin
* @returns {JoinItem}
*/
customSubQueryJoin(customJoin) {
const on = this.evaluateSql(customJoin.on.cubeName, customJoin.on.expression);

return {
sql: `(${customJoin.sql})`,
alias: customJoin.alias,
on,
joinType: customJoin.joinType,
};
}

/**
*
* @param {string} dimension
* @returns {JoinItem}
*/
subQueryJoin(dimension) {
const { prefix, subQuery, cubeName } = this.subQueryDescription(dimension);
const primaryKeys = this.cubeEvaluator.primaryKeys[cubeName];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,32 @@ Array [
]
`;

exports[`SQL API Postgres (Data) join with filtered grouped query: join grouped with filter 1`] = `
Array [
Object {
"count": "2",
"status": "processed",
},
Object {
"count": "2",
"status": "new",
},
]
`;

exports[`SQL API Postgres (Data) join with grouped query: join grouped 1`] = `
Array [
Object {
"count": "2",
"status": "processed",
},
Object {
"count": "1",
"status": "shipped",
},
]
`;

exports[`SQL API Postgres (Data) metabase max number: metabase max number 1`] = `
Array [
Object {
Expand Down
59 changes: 59 additions & 0 deletions packages/cubejs-testing/test/smoke-cubesql.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,65 @@ filter_subq AS (
expect(res.rows).toMatchSnapshot('select __user and literal in wrapper');
});

test('join with grouped query', async () => {
const query = `
SELECT
"Orders".status AS status,
COUNT(*) AS count
FROM
"Orders"
INNER JOIN
(
SELECT
status,
SUM(totalAmount)
FROM
"Orders"
GROUP BY 1
ORDER BY 2 DESC
LIMIT 2
) top_orders
ON
"Orders".status = top_orders.status
GROUP BY 1
ORDER BY 1
`;

const res = await connection.query(query);
// Expect only top statuses 2 by total amount: processed and shipped
expect(res.rows).toMatchSnapshot('join grouped');
});

test('join with filtered grouped query', async () => {
const query = `
SELECT
"Orders".status AS status,
COUNT(*) AS count
FROM
"Orders"
INNER JOIN
(
SELECT
status,
SUM(totalAmount)
FROM
"Orders"
WHERE
status NOT IN ('shipped')
GROUP BY 1
ORDER BY 2 DESC
LIMIT 2
) top_orders
ON
"Orders".status = top_orders.status
GROUP BY 1
`;

const res = await connection.query(query);
// Expect only top statuses 2 by total amount, with shipped filtered out: processed and new
expect(res.rows).toMatchSnapshot('join grouped with filter');
});

test('where segment is false', async () => {
const query =
'SELECT value AS val, * FROM "SegmentTest" WHERE segment_eq_1 IS FALSE ORDER BY value;';
Expand Down
1 change: 1 addition & 0 deletions rust/cubesql/cubeclient/.openapi-generator/FILES
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ src/models/v1_load_request_query_filter_base.rs
src/models/v1_load_request_query_filter_item.rs
src/models/v1_load_request_query_filter_logical_and.rs
src/models/v1_load_request_query_filter_logical_or.rs
src/models/v1_load_request_query_join_subquery.rs
src/models/v1_load_request_query_time_dimension.rs
src/models/v1_load_response.rs
src/models/v1_load_result.rs
Expand Down
2 changes: 2 additions & 0 deletions rust/cubesql/cubeclient/src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ pub mod v1_load_request_query_filter_logical_and;
pub use self::v1_load_request_query_filter_logical_and::V1LoadRequestQueryFilterLogicalAnd;
pub mod v1_load_request_query_filter_logical_or;
pub use self::v1_load_request_query_filter_logical_or::V1LoadRequestQueryFilterLogicalOr;
pub mod v1_load_request_query_join_subquery;
pub use self::v1_load_request_query_join_subquery::V1LoadRequestQueryJoinSubquery;
pub mod v1_load_request_query_time_dimension;
pub use self::v1_load_request_query_time_dimension::V1LoadRequestQueryTimeDimension;
pub mod v1_load_response;
Expand Down
3 changes: 3 additions & 0 deletions rust/cubesql/cubeclient/src/models/v1_load_request_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ pub struct V1LoadRequestQuery {
pub filters: Option<Vec<crate::models::V1LoadRequestQueryFilterItem>>,
#[serde(rename = "ungrouped", skip_serializing_if = "Option::is_none")]
pub ungrouped: Option<bool>,
#[serde(rename = "subqueryJoins", skip_serializing_if = "Option::is_none")]
pub subquery_joins: Option<Vec<crate::models::V1LoadRequestQueryJoinSubquery>>,
}

impl V1LoadRequestQuery {
Expand All @@ -42,6 +44,7 @@ impl V1LoadRequestQuery {
offset: None,
filters: None,
ungrouped: None,
subquery_joins: None,
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Cube.js
*
* Cube.js Swagger Schema
*
* The version of the OpenAPI document: 1.0.0
*
* Generated by: https://openapi-generator.tech
*/

#[derive(Clone, Debug, PartialEq, Default, Serialize, Deserialize)]

Check warning on line 11 in rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs#L11

Added line #L11 was not covered by tests
pub struct V1LoadRequestQueryJoinSubquery {
#[serde(rename = "sql")]
pub sql: String,
#[serde(rename = "on")]
pub on: String,
#[serde(rename = "joinType")]
pub join_type: String,
#[serde(rename = "alias")]
pub alias: String,
}

impl V1LoadRequestQueryJoinSubquery {
pub fn new(
sql: String,
on: String,
join_type: String,
alias: String,
) -> V1LoadRequestQueryJoinSubquery {
V1LoadRequestQueryJoinSubquery {
sql,
on,
join_type,
alias,
}
}

Check warning on line 36 in rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubeclient/src/models/v1_load_request_query_join_subquery.rs#L24-L36

Added lines #L24 - L36 were not covered by tests
}
1 change: 1 addition & 0 deletions rust/cubesql/cubesql/src/compile/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@
None
},
ungrouped: None,
subquery_joins: None,

Check warning on line 154 in rust/cubesql/cubesql/src/compile/builder.rs

View check run for this annotation

Codecov / codecov/patch

rust/cubesql/cubesql/src/compile/builder.rs#L154

Added line #L154 was not covered by tests
},
meta: self.meta,
}
Expand Down
Loading
Loading