Skip to content

Commit

Permalink
more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
adriangb committed Jan 31, 2025
1 parent 99a37c6 commit 450ed48
Showing 1 changed file with 100 additions and 8 deletions.
108 changes: 100 additions & 8 deletions datafusion/core/tests/sql/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ async fn test_select_system_column() {
let select0 = "SELECT * FROM test order by id";
let df = ctx.sql(select0).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+--------------+",
"| id | bank_account |",
Expand All @@ -391,6 +392,7 @@ async fn test_select_system_column() {
let select1 = "SELECT _rowid FROM test order by _rowid";
let df = ctx.sql(select1).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+",
"| _rowid |",
Expand All @@ -405,6 +407,7 @@ async fn test_select_system_column() {
let select2 = "SELECT _rowid, id FROM test order by _rowid";
let df = ctx.sql(select2).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+----+",
"| _rowid | id |",
Expand All @@ -419,6 +422,7 @@ async fn test_select_system_column() {
let select3 = "SELECT _rowid, id FROM test WHERE _rowid = 0";
let df = ctx.sql(select3).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+----+",
"| _rowid | id |",
Expand All @@ -431,6 +435,7 @@ async fn test_select_system_column() {
let select4 = "SELECT _rowid FROM test LIMIT 1";
let df = ctx.sql(select4).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+",
"| _rowid |",
Expand All @@ -443,6 +448,7 @@ async fn test_select_system_column() {
let select5 = "SELECT _rowid, id FROM test WHERE _rowid % 2 = 1";
let df = ctx.sql(select5).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+----+",
"| _rowid | id |",
Expand All @@ -455,6 +461,7 @@ async fn test_select_system_column() {
let select6 = "SELECT _rowid, _file FROM test order by _rowid";
let df = ctx.sql(select6).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+--------+--------+",
"| _rowid | _file |",
Expand All @@ -466,11 +473,18 @@ async fn test_select_system_column() {
];
assert_batches_sorted_eq!(expected, &batchs);

let select6 = "SELECT id FROM test order by _rowid desc";
let select6 = "SELECT id FROM test order by _rowid asc";
let df = ctx.sql(select6).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+", "| id |", "+----+", "| 3 |", "| 2 |", "| 1 |", "+----+",
"+----+",
"| id |",
"+----+",
"| 1 |",
"| 2 |",
"| 3 |",
"+----+",
];
assert_batches_sorted_eq!(expected, &batchs);

Expand All @@ -495,21 +509,22 @@ async fn test_select_system_column() {
let batch = record_batch!(
("other_id", UInt8, [1, 2, 3]),
("bank_account", UInt64, [9, 10, 11]),
("_row_id", UInt32, [10, 11, 12]) // not a system column!
("_rowid", UInt32, [10, 11, 12]) // not a system column!
)
.unwrap();
let _ = ctx.register_batch("test2", batch);

// Normally _row_id would be a name conflict
// But when it's a conflict between a metadata column and a non-metadata column, the non metadata column should be used
// Normally _rowid would be a name conflict and throw an error during planning.
// But when it's a conflict between a system column and a non system column,
// the non system column should be used.
let select7 =
"SELECT id, other_id, _row_id FROM test INNER JOIN test2 ON id = other_id";
"SELECT id, other_id, _rowid FROM test INNER JOIN test2 ON id = other_id";
let df = ctx.sql(select7).await.unwrap();
let batchs = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+----------+---------+",
"| id | other_id | _row_id |",
"| id | other_id | _rowid |",
"+----+----------+---------+",
"| 1 | 1 | 10 |",
"| 2 | 2 | 11 |",
Expand All @@ -518,8 +533,85 @@ async fn test_select_system_column() {
];
assert_batches_sorted_eq!(expected, &batchs);

// Demonstrate that for other columns we get a conflict
// Sanity check: for other columns we do get a conflict
let select7 =
"SELECT id, other_id, bank_account FROM test INNER JOIN test2 ON id = other_id";
assert!(ctx.sql(select7).await.is_err());

// Demonstrate that we can join on _rowid
let batch = record_batch!(
("other_id", UInt8, [2, 3, 4]),
("_rowid", UInt32, [2, 3, 4])
)
.unwrap();
let batch = batch
.with_schema(Arc::new(Schema::new(vec![
Field::new("other_id", DataType::UInt8, true),
Field::new("_rowid", DataType::UInt32, true).to_system_column(),
])))
.unwrap();
let _ = ctx.register_batch("test2", batch);

let select8 = "SELECT id, other_id, _rowid FROM test JOIN test2 ON _rowid = _rowid";
let df = ctx.sql(select8).await.unwrap();
let batches = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+----------+---------+",
"| id | other_id | _rowid |",
"+----+----------+---------+",
"| 2 | 2 | 2 |",
"+----+----------+---------+",
];
assert_batches_sorted_eq!(expected, &batches);

// Once passed through a projection, system columns are no longer available
let select9 = r"
WITH cte AS (SELECT * FROM test)
SELECT * FROM cte
";
let df = ctx.sql(select9).await.unwrap();
let batches = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+----------+---------+",
"| id | other_id | _rowid |",
"+----+----------+---------+",
"| 2 | 2 | 2 |",
"+----+----------+---------+",
];
assert_batches_sorted_eq!(expected, &batches);
let select10 = r"
WITH cte AS (SELECT * FROM test)
SELECT _rowid FROM cte
";
let df = ctx.sql(select10).await.unwrap();
let batches = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+----------+---------+",
"| id | other_id | _rowid |",
"+----+----------+---------+",
"| 2 | 2 | 2 |",
"+----+----------+---------+",
];
assert_batches_sorted_eq!(expected, &batches);

// And if passed explicitly selected and passed through a projection
// they are no longer system columns.
let select11 = r"
WITH cte AS (SELECT id, _rowid FROM test)
SELECT * FROM cte
";
let df = ctx.sql(select11).await.unwrap();
let batches = df.collect().await.unwrap();
#[rustfmt::skip]
let expected = [
"+----+---------+",
"| id | _rowid |",
"+----+---------+",
"| 2 | 2 |",
"+----+---------+",
];
assert_batches_sorted_eq!(expected, &batches);
}

0 comments on commit 450ed48

Please sign in to comment.