Skip to content

Commit

Permalink
Merge branch 'apache:main' into chore/add-pre-commit-hooks
Browse files Browse the repository at this point in the history
  • Loading branch information
muyihao authored Aug 25, 2024
2 parents 075a26e + 5bf117a commit 576f04e
Show file tree
Hide file tree
Showing 15 changed files with 258 additions and 81 deletions.
59 changes: 59 additions & 0 deletions .github/workflows/code.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

name: Code

on:
push:
branches:
- main
- release/**
pull_request:
branches:
- main

jobs:
check-code:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Check license header
uses: apache/skywalking-eyes/[email protected]

- name: Check rust code style
run: cd python && make check-rust

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.8'
cache: pip
cache-dependency-path: pyproject.toml

- name: Install python linter dependencies
working-directory: ./python
run: |
make setup-venv
source venv/bin/activate
pip install ruff==0.5.2 mypy==1.10.1
- name: Check python code style
working-directory: ./python
run: |
source venv/bin/activate
make check-python
23 changes: 11 additions & 12 deletions .github/workflows/compliance.yml → .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
# specific language governing permissions and limitations
# under the License.

name: Compliance
name: PR

on:
pull_request_target:
pull_request:
types: [ opened, edited, reopened, synchronize ]
branches:
- main
Expand All @@ -32,20 +32,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Setup Node
uses: actions/setup-node@v4
with:
node-version: '20.x'
- name: Linting
node-version: 20

- name: Linting commit
run: |
npm i -g conventional-changelog-conventionalcommits
npm i -g commitlint@latest
echo ${{ github.event.pull_request.title }} | npx commitlint
- name: Labeling
uses: actions/labeler@v5
check-changes:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Check license header
uses: apache/skywalking-eyes/[email protected]
- name: Check code style
run: cd python && make check-rust
# disable until figuring out a way to do this without pull_request_target
if: false
34 changes: 17 additions & 17 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,25 @@ repository = "https://github.com/apache/hudi-rs"

[workspace.dependencies]
# arrow
arrow = { version = "52.0.0", features = ["pyarrow"] }
arrow-arith = { version = "52.0.0" }
arrow-array = { version = "52.0.0" }
arrow-buffer = { version = "52.0.0" }
arrow-cast = { version = "52.0.0" }
arrow-ipc = { version = "52.0.0" }
arrow-json = { version = "52.0.0" }
arrow-ord = { version = "52.0.0" }
arrow-row = { version = "52.0.0" }
arrow-schema = { version = "52.0.0", features = ["serde"] }
arrow-select = { version = "52.0.0" }
object_store = { version = "0.10.1", features = ["aws", "azure", "gcp"] }
parquet = { version = "52.0.0", features = ["async", "object_store"] }
arrow = { version = "= 52.0.0", features = ["pyarrow"] }
arrow-arith = { version = "= 52.0.0" }
arrow-array = { version = "= 52.0.0" }
arrow-buffer = { version = "= 52.0.0" }
arrow-cast = { version = "= 52.0.0" }
arrow-ipc = { version = "= 52.0.0" }
arrow-json = { version = "= 52.0.0" }
arrow-ord = { version = "= 52.0.0" }
arrow-row = { version = "= 52.0.0" }
arrow-schema = { version = "= 52.0.0", features = ["serde"] }
arrow-select = { version = "= 52.0.0" }
object_store = { version = "= 0.10.1", features = ["aws", "azure", "gcp"] }
parquet = { version = "= 52.0.0", features = ["async", "object_store"] }

# datafusion
datafusion = { version = "39.0.0" }
datafusion-expr = { version = "39.0.0" }
datafusion-common = { version = "39.0.0" }
datafusion-physical-expr = { version = "39.0.0" }
datafusion = { version = "= 39.0.0" }
datafusion-expr = { version = "= 39.0.0" }
datafusion-common = { version = "= 39.0.0" }
datafusion-physical-expr = { version = "= 39.0.0" }

# serde
serde = { version = "1.0.203", features = ["derive"] }
Expand Down
14 changes: 14 additions & 0 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,19 @@ dashmap = { workspace = true }
futures = { workspace = true }
tokio = { workspace = true }

# datafusion
datafusion = { workspace = true, optional = true }
datafusion-expr = { workspace = true, optional = true }
datafusion-common = { workspace = true, optional = true }
datafusion-physical-expr = { workspace = true, optional = true }

[dev-dependencies]
hudi-tests = { path = "../tests" }

[features]
datafusion = [
"dep:datafusion",
"datafusion-expr",
"datafusion-common",
"datafusion-physical-expr",
]
8 changes: 8 additions & 0 deletions crates/core/src/storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ impl Storage {
}
}

#[cfg(feature = "datafusion")]
pub fn register_object_store(
&self,
runtime_env: Arc<datafusion::execution::runtime_env::RuntimeEnv>,
) {
runtime_env.register_object_store(self.base_url.as_ref(), self.object_store.clone());
}

#[cfg(test)]
async fn get_file_info(&self, relative_path: &str) -> Result<FileInfo> {
let obj_url = join_url_segments(&self.base_url, &[relative_path])?;
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/table/fs_view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use crate::storage::{get_leaf_dirs, Storage};
#[allow(dead_code)]
pub struct FileSystemView {
configs: Arc<HudiConfigs>,
storage: Arc<Storage>,
pub(crate) storage: Arc<Storage>,
partition_to_file_groups: Arc<DashMap<String, Vec<FileGroup>>>,
}

Expand Down
13 changes: 13 additions & 0 deletions crates/core/src/table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,19 @@ impl Table {
})
}

#[cfg(feature = "datafusion")]
pub fn register_storage(
&self,
runtime_env: Arc<datafusion::execution::runtime_env::RuntimeEnv>,
) {
self.timeline
.storage
.register_object_store(runtime_env.clone());
self.file_system_view
.storage
.register_object_store(runtime_env.clone());
}

async fn load_configs<I, K, V>(
base_url: Arc<Url>,
all_options: I,
Expand Down
2 changes: 1 addition & 1 deletion crates/core/src/table/timeline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl Instant {
#[allow(dead_code)]
pub struct Timeline {
configs: Arc<HudiConfigs>,
storage: Arc<Storage>,
pub(crate) storage: Arc<Storage>,
pub instants: Vec<Instant>,
}

Expand Down
2 changes: 1 addition & 1 deletion crates/datafusion/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ homepage.workspace = true
repository.workspace = true

[dependencies]
hudi-core = { version = "0.2.0", path = "../core" }
hudi-core = { version = "0.2.0", path = "../core", features = ["datafusion"] }
# arrow
arrow-schema = { workspace = true }

Expand Down
2 changes: 2 additions & 0 deletions crates/datafusion/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ impl TableProvider for HudiDataSource {
filters: &[Expr],
limit: Option<usize>,
) -> Result<Arc<dyn ExecutionPlan>> {
self.table.register_storage(state.runtime_env().clone());

let file_slices = self
.table
.split_file_slices(self.get_input_partitions())
Expand Down
9 changes: 9 additions & 0 deletions python/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@ check-rust: ## Run check on Rust
$(info --- Check Rust format ---)
cargo fmt --all -- --check

.PHONY: check-python
check-python: ## Run check on Python
$(info --- Check Python format ---)
ruff format --check --diff .
$(info --- Check Python linting ---)
ruff check .
$(info --- Check Python typing ---)
mypy .

.PHONY: test-rust
test-rust: ## Run tests on Rust
$(info --- Run Rust tests ---)
Expand Down
2 changes: 1 addition & 1 deletion python/hudi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
# specific language governing permissions and limitations
# under the License.

from ._internal import __version__ as __version__
from ._internal import HudiFileSlice as HudiFileSlice
from ._internal import HudiTable as HudiTable
from ._internal import __version__ as __version__
21 changes: 6 additions & 15 deletions python/hudi/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,12 @@
# specific language governing permissions and limitations
# under the License.
from dataclasses import dataclass
from typing import Optional, Dict, List
from typing import Dict, List, Optional

import pyarrow
import pyarrow # type: ignore

__version__: str


@dataclass(init=False)
class HudiFileSlice:
file_group_id: str
Expand All @@ -33,24 +32,16 @@ class HudiFileSlice:

def base_file_relative_path(self) -> str: ...


@dataclass(init=False)
class HudiTable:

def __init__(
self,
table_uri: str,
options: Optional[Dict[str, str]] = None,
self,
table_uri: str,
options: Optional[Dict[str, str]] = None,
): ...

def get_schema(self) -> "pyarrow.Schema": ...

def split_file_slices(self, n: int) -> List[List[HudiFileSlice]]: ...

def get_file_slices(self) -> List[HudiFileSlice]: ...

def read_file_slice(self, base_file_relative_path) -> pyarrow.RecordBatch: ...

def read_file_slice(self, base_file_relative_path: str) -> pyarrow.RecordBatch: ...
def read_snapshot(self) -> List["pyarrow.RecordBatch"]: ...

def read_snapshot_as_of(self, timestamp: str) -> List["pyarrow.RecordBatch"]: ...
18 changes: 18 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,34 @@ dependencies = [
optional-dependencies = { devel = [
"pytest",
"coverage",
"ruff==0.5.2",
"mypy==1.10.1",
] }

dynamic = ["version"]

[tool.maturin]
module-name = "hudi._internal"

[tool.ruff]
target-version = 'py38'
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
lint.select = [
"E4",
"E7",
"E9",
"F",
# isort
"I",
]
# don't ignore any rule unless it becomes imperative
lint.ignore = []
lint.isort.known-first-party = ["hudi"]

[tool.mypy]
files = "hudi/*.py"
exclude = "^tests"
strict = true

[tool.pytest.ini_options]
testpaths = [
Expand Down
Loading

0 comments on commit 576f04e

Please sign in to comment.