Skip to content

Commit

Permalink
feat: Adds .describe() for SELECT statements
Browse files Browse the repository at this point in the history
  • Loading branch information
rchowell committed Jan 23, 2025
1 parent b1fc25f commit 409f557
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 38 deletions.
4 changes: 2 additions & 2 deletions src/daft-schema/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ impl Schema {
res.push_str("<thead><tr>");

// Add header for column name and type
res.push_str("<th style=\"text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left\">Column Name</th>");
res.push_str("<th style=\"text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left\">Type</th>");
res.push_str("<th style=\"text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left\">column</th>");
res.push_str("<th style=\"text-wrap: nowrap; max-width:192px; overflow:auto; text-align:left\">type</th>");

// End the header.
res.push_str("</tr></thead>\n");
Expand Down
19 changes: 12 additions & 7 deletions src/daft-sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,21 @@ impl<'a> SQLPlanner<'a> {
})
.with_tokens(tokens);

// currently only allow one statement
let statements = parser.parse_statements()?;
if statements.len() > 1 {
unsupported_sql_err!(
"Only exactly one SQL statement allowed, found {}",
statements.len()
)
}

let plan = match statements.len() {
1 => Ok(self.plan_statement(&statements[0])?),
other => {
unsupported_sql_err!("Only exactly one SQL statement allowed, found {}", other)
}
};
// plan single statement
let stmt = &statements[0];
let plan = self.plan_statement(stmt)?.build();
self.clear_context();
plan

Ok(plan)
}

pub(crate) fn plan_query(&mut self, query: &Query) -> SQLPlannerResult<LogicalPlanBuilder> {
Expand Down
27 changes: 13 additions & 14 deletions src/daft-sql/src/statement.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use daft_logical_plan::LogicalPlanRef;
use daft_logical_plan::LogicalPlanBuilder;
use sqlparser::ast;

use crate::{error::SQLPlannerResult, unsupported_sql_err, SQLPlanner};
Expand All @@ -9,7 +9,7 @@ impl<'a> SQLPlanner<'a> {
pub(crate) fn plan_statement(
&mut self,
statement: &ast::Statement,
) -> SQLPlannerResult<LogicalPlanRef> {
) -> SQLPlannerResult<LogicalPlanBuilder> {
match statement {
ast::Statement::Query(query) => self.plan_select(query),
ast::Statement::Explain {
Expand All @@ -30,24 +30,24 @@ impl<'a> SQLPlanner<'a> {
*has_table_keyword,
table_name,
),
other => unsupported_sql_err!("{}", other),
other => unsupported_sql_err!("unsupported statement, {}", other),
}
}

/// SELECT ...
fn plan_select(&mut self, query: &ast::Query) -> SQLPlannerResult<LogicalPlanRef> {
Ok(self.plan_query(query)?.build())
fn plan_select(&mut self, query: &ast::Query) -> SQLPlannerResult<LogicalPlanBuilder> {
self.plan_query(query)
}

/// DESCRIBE <statement>
fn plan_describe(
&self,
&mut self,
describe_alias: &ast::DescribeAlias,
analyze: bool,
verbose: bool,
_statement: &ast::Statement,
statement: &ast::Statement,
format: &Option<ast::AnalyzeFormat>,
) -> SQLPlannerResult<LogicalPlanRef> {
) -> SQLPlannerResult<LogicalPlanBuilder> {
// err on `DESC | EXPLAIN`
if *describe_alias != ast::DescribeAlias::Describe {
unsupported_sql_err!(
Expand All @@ -59,17 +59,18 @@ impl<'a> SQLPlanner<'a> {
if analyze || verbose || format.is_some() {
unsupported_sql_err!("DESCRIBE ( options.. ) is not supported")
}
unsupported_sql_err!("DESCRIBE <statement> is not supported")
// plan statement and .describe()
Ok(self.plan_statement(statement)?.describe()?)
}

/// DESCRIBE TABLE <table>
/// DESCRIBE <table>
fn plan_describe_table(
&self,
describe_alias: &ast::DescribeAlias,
hive_format: &Option<ast::HiveDescribeFormat>,
has_table_keyword: bool,
table_name: &ast::ObjectName,
) -> SQLPlannerResult<LogicalPlanRef> {
) -> SQLPlannerResult<LogicalPlanBuilder> {
// err on `DESC | EXPLAIN`
if *describe_alias != ast::DescribeAlias::Describe {
unsupported_sql_err!(
Expand All @@ -86,8 +87,6 @@ impl<'a> SQLPlanner<'a> {
unsupported_sql_err!("DESCRIBE TABLE is not supported, did you mean DESCRIBE?")
}
// resolve table and .describe()
let rel = self.plan_relation_table(table_name)?;
let res = rel.inner.describe()?.build();
Ok(res)
Ok(self.plan_relation_table(table_name)?.inner.describe()?)
}
}
2 changes: 1 addition & 1 deletion tests/dataframe/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ def test_repr_empty_struct():
assert df.schema()._truncated_table_string() == expected_schema_truncated_repr

expected_schema_repr = """╭──────────────────────┬──────────────────────────────────╮
Column Name ┆ Type
column ┆ type
╞══════════════════════╪══════════════════════════════════╡
│ empty_structs ┆ Struct[] │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
Expand Down
22 changes: 20 additions & 2 deletions tests/sql/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,32 @@ def assert_eq(df1, df2):
assert df1.collect().to_pydict() == df2.collect().to_pydict()


def test_describe():
def test_describe_table():
actual_df = daft.sql("DESCRIBE df")
expect_df = df.describe()
assert_eq(actual_df, expect_df)


@pytest.mark.skip("DESCRIBE TABLE syntax not supported")
def test_describe_table():
def test_describe_table_with_keyword():
actual_df = daft.sql("DESCRIBE TABLE df")
expect_df = df.describe()
assert_eq(actual_df, expect_df)


def test_describe_select_all():
actual_df = daft.sql("DESCRIBE SELECT * FROM df")
expect_df = df.describe()
assert_eq(actual_df, expect_df)


def test_describe_select_one():
actual_df = daft.sql("DESCRIBE SELECT integers FROM df")
expect_df = df.select("integers").describe()
assert_eq(actual_df, expect_df)


def test_describe_select_some():
actual_df = daft.sql("DESCRIBE SELECT integers, floats, bools FROM df")
expect_df = df.select("integers", "floats", "bools").describe()
assert_eq(actual_df, expect_df)
24 changes: 12 additions & 12 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,17 @@ def test_repr():
without_escape = ANSI_ESCAPE.sub("", out_repr)
assert (
without_escape.replace("\r", "")
== """╭─────────────┬─────────╮
Column Name ┆ Type
╞═════════════╪═════════╡
│ int ┆ Int64 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ float ┆ Float64 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ string ┆ Utf8 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ bool ┆ Boolean │
╰─────────────┴─────────╯
== """╭────────┬─────────╮
column ┆ type
╞════════╪═════════╡
│ int ┆ Int64 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ float ┆ Float64 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ string ┆ Utf8 │
├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
│ bool ┆ Boolean │
╰────────┴─────────╯
"""
)

Expand All @@ -117,7 +117,7 @@ def test_repr_html():
assert (
out_repr
== f"""<table class="dataframe">
<thead><tr><th {TH_STYLE}>Column Name</th><th {TH_STYLE}>Type</th></tr></thead>
<thead><tr><th {TH_STYLE}>column</th><th {TH_STYLE}>type</th></tr></thead>
<tbody>
<tr><td {TD_STYLE}>int</td><td {TD_STYLE}>Int64</td></tr>
<tr><td {TD_STYLE}>float</td><td {TD_STYLE}>Float64</td></tr>
Expand Down

0 comments on commit 409f557

Please sign in to comment.