diff --git a/optd-persistent/src/bin/init.rs b/optd-persistent/src/bin/init.rs index c2291d7..e39bd56 100644 --- a/optd-persistent/src/bin/init.rs +++ b/optd-persistent/src/bin/init.rs @@ -355,7 +355,8 @@ async fn init_all_tables() -> Result<(), sea_orm::error::DbErr> { id: Set(1), physical_expression_id: Set(1), epoch_id: Set(1), - cost: Set(10), + cost: Set(json!({"compute_cost":10, "io_cost":10})), + estimated_statistic: Set(10), is_valid: Set(true), }; plan_cost::Entity::insert(plan_cost) diff --git a/optd-persistent/src/cost_model/interface.rs b/optd-persistent/src/cost_model/interface.rs index d896ec1..92d7e44 100644 --- a/optd-persistent/src/cost_model/interface.rs +++ b/optd-persistent/src/cost_model/interface.rs @@ -67,6 +67,15 @@ pub struct Stat { pub name: String, } +/// TODO: documentation +#[derive(Clone, Debug, PartialEq)] +pub struct Cost { + pub compute_cost: i32, + pub io_cost: i32, + // Raw estimated output row count of targeted expression. + pub estimated_statistic: i32, +} + /// TODO: documentation #[trait_variant::make(Send)] pub trait CostModelStorageLayer { @@ -91,7 +100,7 @@ pub trait CostModelStorageLayer { async fn store_cost( &self, expr_id: Self::ExprId, - cost: i32, + cost: Cost, epoch_id: Self::EpochId, ) -> StorageResult<()>; @@ -126,7 +135,7 @@ pub trait CostModelStorageLayer { &self, expr_id: Self::ExprId, epoch_id: Self::EpochId, - ) -> StorageResult>; + ) -> StorageResult>; - async fn get_cost(&self, expr_id: Self::ExprId) -> StorageResult>; + async fn get_cost(&self, expr_id: Self::ExprId) -> StorageResult>; } diff --git a/optd-persistent/src/cost_model/orm.rs b/optd-persistent/src/cost_model/orm.rs index dc35d2e..b6d1cdc 100644 --- a/optd-persistent/src/cost_model/orm.rs +++ b/optd-persistent/src/cost_model/orm.rs @@ -2,6 +2,7 @@ use std::ptr::null; +use crate::cost_model::interface::Cost; use crate::entities::{prelude::*, *}; use crate::{BackendError, BackendManager, CostModelError, CostModelStorageLayer, StorageResult}; use sea_orm::prelude::{Expr, Json}; @@ -11,6 +12,7 @@ use sea_orm::{ ActiveModelTrait, ColumnTrait, DbBackend, DbErr, DeleteResult, EntityOrSelect, ModelTrait, QueryFilter, QueryOrder, QuerySelect, QueryTrait, RuntimeErr, TransactionTrait, }; +use serde_json::json; use super::catalog::mock_catalog::{self, MockCatalog}; use super::interface::{CatalogSource, EpochOption, Stat}; @@ -443,7 +445,7 @@ impl CostModelStorageLayer for BackendManager { &self, expr_id: Self::ExprId, epoch_id: Self::EpochId, - ) -> StorageResult> { + ) -> StorageResult> { let cost = PlanCost::find() .filter(plan_cost::Column::PhysicalExpressionId.eq(expr_id)) .filter(plan_cost::Column::EpochId.eq(epoch_id)) @@ -451,10 +453,14 @@ impl CostModelStorageLayer for BackendManager { .await?; assert!(cost.is_some(), "Cost not found in Cost table"); assert!(cost.clone().unwrap().is_valid, "Cost is not valid"); - Ok(cost.map(|c| c.cost)) + Ok(cost.map(|c| Cost { + compute_cost: c.cost.get("compute_cost").unwrap().as_i64().unwrap() as i32, + io_cost: c.cost.get("io_cost").unwrap().as_i64().unwrap() as i32, + estimated_statistic: c.estimated_statistic, + })) } - async fn get_cost(&self, expr_id: Self::ExprId) -> StorageResult> { + async fn get_cost(&self, expr_id: Self::ExprId) -> StorageResult> { let cost = PlanCost::find() .filter(plan_cost::Column::PhysicalExpressionId.eq(expr_id)) .order_by_desc(plan_cost::Column::EpochId) @@ -462,14 +468,18 @@ impl CostModelStorageLayer for BackendManager { .await?; assert!(cost.is_some(), "Cost not found in Cost table"); assert!(cost.clone().unwrap().is_valid, "Cost is not valid"); - Ok(cost.map(|c| c.cost)) + Ok(cost.map(|c| Cost { + compute_cost: c.cost.get("compute_cost").unwrap().as_i64().unwrap() as i32, + io_cost: c.cost.get("io_cost").unwrap().as_i64().unwrap() as i32, + estimated_statistic: c.estimated_statistic, + })) } /// TODO: documentation async fn store_cost( &self, physical_expression_id: Self::ExprId, - cost: i32, + cost: Cost, epoch_id: Self::EpochId, ) -> StorageResult<()> { let expr_exists = PhysicalExpression::find_by_id(physical_expression_id) @@ -496,7 +506,10 @@ impl CostModelStorageLayer for BackendManager { let new_cost = plan_cost::ActiveModel { physical_expression_id: sea_orm::ActiveValue::Set(physical_expression_id), epoch_id: sea_orm::ActiveValue::Set(epoch_id), - cost: sea_orm::ActiveValue::Set(cost), + cost: sea_orm::ActiveValue::Set( + json!({"compute_cost": cost.compute_cost, "io_cost": cost.io_cost}), + ), + estimated_statistic: sea_orm::ActiveValue::Set(cost.estimated_statistic), is_valid: sea_orm::ActiveValue::Set(true), ..Default::default() }; @@ -507,7 +520,7 @@ impl CostModelStorageLayer for BackendManager { #[cfg(test)] mod tests { - use crate::cost_model::interface::{EpochOption, StatType}; + use crate::cost_model::interface::{Cost, EpochOption, StatType}; use crate::{cost_model::interface::Stat, migrate, CostModelStorageLayer}; use crate::{get_sqlite_url, TEST_DATABASE_FILE}; use sea_orm::sqlx::database; @@ -681,7 +694,17 @@ mod tests { .await .unwrap(); backend_manager - .store_cost(expr_id, 42, versioned_stat_res[0].epoch_id) + .store_cost( + expr_id, + { + Cost { + compute_cost: 42, + io_cost: 42, + estimated_statistic: 42, + } + }, + versioned_stat_res[0].epoch_id, + ) .await .unwrap(); let cost_res = PlanCost::find() @@ -744,7 +767,7 @@ mod tests { .await .unwrap(); assert_eq!(cost_res.len(), 1); - assert_eq!(cost_res[0].cost, 42); + assert_eq!(cost_res[0].cost, json!({"compute_cost": 42, "io_cost": 42})); assert_eq!(cost_res[0].epoch_id, epoch_id1); assert!(!cost_res[0].is_valid); @@ -875,9 +898,13 @@ mod tests { .await .unwrap(); let physical_expression_id = 1; - let cost = 42; + let cost = Cost { + compute_cost: 42, + io_cost: 42, + estimated_statistic: 42, + }; backend_manager - .store_cost(physical_expression_id, cost, epoch_id) + .store_cost(physical_expression_id, cost.clone(), epoch_id) .await .unwrap(); let costs = super::PlanCost::find() @@ -887,7 +914,14 @@ mod tests { assert_eq!(costs.len(), 2); // The first row one is the initialized data assert_eq!(costs[1].epoch_id, epoch_id); assert_eq!(costs[1].physical_expression_id, physical_expression_id); - assert_eq!(costs[1].cost, cost); + assert_eq!( + costs[1].cost, + json!({"compute_cost": cost.compute_cost, "io_cost": cost.io_cost}) + ); + assert_eq!( + costs[1].estimated_statistic as i32, + cost.estimated_statistic + ); remove_db_file(DATABASE_FILE); } @@ -903,9 +937,13 @@ mod tests { .await .unwrap(); let physical_expression_id = 1; - let cost = 42; + let cost = Cost { + compute_cost: 42, + io_cost: 42, + estimated_statistic: 42, + }; let _ = backend_manager - .store_cost(physical_expression_id, cost, epoch_id) + .store_cost(physical_expression_id, cost.clone(), epoch_id) .await; let costs = super::PlanCost::find() .all(&backend_manager.db) @@ -914,7 +952,14 @@ mod tests { assert_eq!(costs.len(), 2); // The first row one is the initialized data assert_eq!(costs[1].epoch_id, epoch_id); assert_eq!(costs[1].physical_expression_id, physical_expression_id); - assert_eq!(costs[1].cost, cost); + assert_eq!( + costs[1].cost, + json!({"compute_cost": cost.compute_cost, "io_cost": cost.io_cost}) + ); + assert_eq!( + costs[1].estimated_statistic as i32, + cost.estimated_statistic + ); let res = backend_manager .get_cost(physical_expression_id) @@ -936,9 +981,13 @@ mod tests { .await .unwrap(); let physical_expression_id = 1; - let cost = 42; + let cost = Cost { + compute_cost: 1420, + io_cost: 42, + estimated_statistic: 42, + }; let _ = backend_manager - .store_cost(physical_expression_id, cost, epoch_id) + .store_cost(physical_expression_id, cost.clone(), epoch_id) .await; let costs = super::PlanCost::find() .all(&backend_manager.db) @@ -947,7 +996,14 @@ mod tests { assert_eq!(costs.len(), 2); // The first row one is the initialized data assert_eq!(costs[1].epoch_id, epoch_id); assert_eq!(costs[1].physical_expression_id, physical_expression_id); - assert_eq!(costs[1].cost, cost); + assert_eq!( + costs[1].cost, + json!({"compute_cost": cost.compute_cost, "io_cost": cost.io_cost}) + ); + assert_eq!( + costs[1].estimated_statistic as i32, + cost.estimated_statistic + ); println!("{:?}", costs); // Retrieve physical_expression_id 1 and epoch_id 1 @@ -957,7 +1013,14 @@ mod tests { .unwrap(); // The cost in the dummy data is 10 - assert_eq!(res.unwrap(), 10); + assert_eq!( + res.unwrap(), + Cost { + compute_cost: 10, + io_cost: 10, + estimated_statistic: 10, + } + ); remove_db_file(DATABASE_FILE); } diff --git a/optd-persistent/src/db/init.db b/optd-persistent/src/db/init.db index 3d860ba..6395ac9 100644 Binary files a/optd-persistent/src/db/init.db and b/optd-persistent/src/db/init.db differ diff --git a/optd-persistent/src/entities/constraint.rs b/optd-persistent/src/entities/constraint.rs deleted file mode 100644 index 0dd2334..0000000 --- a/optd-persistent/src/entities/constraint.rs +++ /dev/null @@ -1,68 +0,0 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 - -use sea_orm::entity::prelude::*; - -#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] -#[sea_orm(table_name = "constraint")] -pub struct Model { - #[sea_orm(primary_key)] - pub id: i32, - pub name: String, - pub variant_tag: i32, - pub table_id: Option, - pub index_id: Option, - pub foreign_ref_id: Option, - pub check_src: String, -} - -#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] -pub enum Relation { - #[sea_orm(has_many = "super::attribute_constraint_junction::Entity")] - AttributeConstraintJunction, - #[sea_orm(has_many = "super::attribute_foreign_constraint_junction::Entity")] - AttributeForeignConstraintJunction, - #[sea_orm( - belongs_to = "super::index::Entity", - from = "Column::IndexId", - to = "super::index::Column::Id", - on_update = "Cascade", - on_delete = "Cascade" - )] - Index, - #[sea_orm( - belongs_to = "super::table_metadata::Entity", - from = "Column::ForeignRefId", - to = "super::table_metadata::Column::Id", - on_update = "Cascade", - on_delete = "Cascade" - )] - TableMetadata2, - #[sea_orm( - belongs_to = "super::table_metadata::Entity", - from = "Column::TableId", - to = "super::table_metadata::Column::Id", - on_update = "Cascade", - on_delete = "Cascade" - )] - TableMetadata1, -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::AttributeConstraintJunction.def() - } -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::AttributeForeignConstraintJunction.def() - } -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::Index.def() - } -} - -impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-persistent/src/entities/index.rs b/optd-persistent/src/entities/index.rs deleted file mode 100644 index 824c7b4..0000000 --- a/optd-persistent/src/entities/index.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 - -use sea_orm::entity::prelude::*; - -#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] -#[sea_orm(table_name = "index")] -pub struct Model { - #[sea_orm(primary_key)] - pub id: i32, - pub table_id: i32, - pub name: String, - pub number_of_attributes: i32, - pub variant_tag: i32, - pub is_unique: bool, - pub nulls_not_distinct: bool, - pub is_primary: bool, - pub is_clustered: bool, - pub is_exclusion: bool, - pub description: String, -} - -#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] -pub enum Relation { - #[sea_orm(has_many = "super::constraint::Entity")] - Constraint, - #[sea_orm( - belongs_to = "super::table_metadata::Entity", - from = "Column::TableId", - to = "super::table_metadata::Column::Id", - on_update = "Cascade", - on_delete = "Cascade" - )] - TableMetadata, -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::Constraint.def() - } -} - -impl Related for Entity { - fn to() -> RelationDef { - Relation::TableMetadata.def() - } -} - -impl ActiveModelBehavior for ActiveModel {} diff --git a/optd-persistent/src/entities/plan_cost.rs b/optd-persistent/src/entities/plan_cost.rs index d284762..1acf101 100644 --- a/optd-persistent/src/entities/plan_cost.rs +++ b/optd-persistent/src/entities/plan_cost.rs @@ -9,7 +9,8 @@ pub struct Model { pub id: i32, pub physical_expression_id: i32, pub epoch_id: i32, - pub cost: i32, + pub cost: Json, + pub estimated_statistic: i32, pub is_valid: bool, } diff --git a/optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs b/optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs index 3ec19fc..d8f9bf9 100644 --- a/optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs +++ b/optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs @@ -12,7 +12,10 @@ pub enum PlanCost { Id, PhysicalExpressionId, EpochId, + // It is json type, including computation cost, I/O cost, etc. Cost, + // Raw estimated output row count of this expression + EstimatedStatistic, // Whether the cost is valid or not. If the latest cost for an expr is invalid, then we need to recompute the cost. // We need to invalidate the cost when the related stats are updated. IsValid, @@ -46,7 +49,8 @@ impl MigrationTrait for Migration { .on_delete(ForeignKeyAction::Cascade) .on_update(ForeignKeyAction::Cascade), ) - .col(integer(PlanCost::Cost)) + .col(json(PlanCost::Cost)) + .col(integer(PlanCost::EstimatedStatistic)) .col(boolean(PlanCost::IsValid)) .to_owned(), ) diff --git a/schema/all_tables.dbml b/schema/all_tables.dbml index 91185b2..091115d 100644 --- a/schema/all_tables.dbml +++ b/schema/all_tables.dbml @@ -59,7 +59,10 @@ Table plan_cost { id integer PK physical_expression_id integer [ref: > physical_expression.id] epoch_id integer [ref: > event.epoch_id] - cost integer + // It is json type, including computation cost, I/O cost, etc. + cost json + // Raw estimated output row count of this expression + estimated_statistic integer // Whether the cost is valid or not. If the latest cost for an expr is invalid, then we need to recompute the cost. // We need to invalidate the cost when the related stats are updated. is_valid boolean