From e0a2f69dfdc8bf45d5e13bacd97a400062a1f8b9 Mon Sep 17 00:00:00 2001 From: Sammy Sidhu Date: Wed, 8 Jan 2025 16:11:38 -0800 Subject: [PATCH 1/3] build: Publish A Long Term Support CPU Release of Daft (#3650) closes: https://github.com/Eventual-Inc/Daft/issues/3564 * Create new daft release `daft-lts` that supports older cpus * upgrade regular daft to use more modern cpu features * move off using API token for PYPI to using identity based upload --- .github/workflows/python-publish.yml | 64 +++++++++++++++++----------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index e21010a939..896c79b603 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,7 +15,6 @@ on: - v* workflow_dispatch: env: - PACKAGE_NAME: getdaft PYTHON_VERSION: 3.11 DAFT_ANALYTICS_ENABLED: '0' UV_SYSTEM_PYTHON: 1 @@ -24,19 +23,28 @@ env: IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} RUST_DAFT_PKG_BUILD_TYPE: ${{ (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0'))) && 'release' || 'nightly' }} +defaults: + run: + shell: bash + jobs: build-and-test: - name: platform wheels for ${{ matrix.os }}-${{ matrix.compile_arch }} + name: platform wheels for ${{ matrix.os }}-${{ matrix.compile_arch }}-lts=${{ matrix.lts }} runs-on: ${{ matrix.os }}-latest strategy: fail-fast: false matrix: os: [ubuntu, macos, windows] compile_arch: [x86_64, aarch64] + lts: [0, 1] # LongTerm Support CPUs + exclude: - os: windows compile_arch: aarch64 + - lts: 1 + compile_arch: aarch64 + steps: - uses: actions/checkout@v4 with: @@ -47,16 +55,29 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} architecture: x64 - run: pip install uv - - run: uv pip install twine toml + - run: uv pip install twine toml yq - run: python tools/patch_package_version.py + - name: Patch name to daft-lts if LTS + if: ${{ matrix.lts }} + run: tomlq -i -t ".project.name = \"daft-lts\"" pyproject.toml + + - name: Configure RUSTFLAGS for x86 + if: ${{ (matrix.compile_arch == 'x86_64') }} + run: | + if [[ ${{ matrix.lts }} ]]; then + echo "RUSTFLAGS=-C target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+cmpxchg16b" >> $GITHUB_ENV && \ + echo "CFLAGS=-msse3 -mssse3 -msse4.1 -msse4.2 -mpopcnt -mcx16" >> $GITHUB_ENV + else + echo "RUSTFLAGS=-C target-feature=+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+cmpxchg16b,+avx,+avx2,+fma,+bmi1,+bmi2,+lzcnt,+pclmulqdq,+movbe -Z tune-cpu=skylake" >> $GITHUB_ENV && \ + echo "CFLAGS=-msse3 -mssse3 -msse4.1 -msse4.2 -mpopcnt -mcx16 -mavx -mavx2 -mfma -mbmi -mbmi2 -mlzcnt -mpclmul -mmovbe -mtune=skylake" >> $GITHUB_ENV + fi + - name: Build wheels - Mac and Windows x86 if: ${{ ((matrix.os == 'macos') || (matrix.os == 'windows')) && (matrix.compile_arch == 'x86_64') }} uses: messense/maturin-action@v1 with: target: x86_64 args: --profile release-lto --out dist - env: - RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2 - name: Build wheels - Linux x86 if: ${{ (matrix.os == 'ubuntu') && (matrix.compile_arch == 'x86_64') }} uses: messense/maturin-action@v1 @@ -66,8 +87,7 @@ jobs: # only produce sdist for linux x86 to avoid multiple copies args: --profile release-lto --out dist --sdist before-script-linux: yum -y install perl-IPC-Cmd - env: - RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma + - name: Build wheels - Linux aarch64 if: ${{ (matrix.os == 'ubuntu') && (matrix.compile_arch == 'aarch64') }} uses: messense/maturin-action@v1 @@ -88,28 +108,21 @@ jobs: args: --profile release-lto --out dist env: RUSTFLAGS: -Ctarget-cpu=apple-m1 + CFLAGS: -mtune=apple-m1 - name: Install and test built wheel - Linux and Mac x86_64 if: ${{ (matrix.os == 'ubuntu') && (matrix.compile_arch == 'x86_64') }} run: | - uv pip install -r requirements-dev.txt dist/${{ env.PACKAGE_NAME }}-*x86_64*.whl --force-reinstall + uv pip install -r requirements-dev.txt dist/*-*x86_64*.whl --force-reinstall rm -rf daft pytest -v env: - DAFT_RUNNER: py - # Disable until we figure out why are we getting FileNotFoundError: [WinError 3] Failed to open local file - # - name: Install and test built wheel - Windows x86_64 - # if: ${{ (matrix.os == 'windows') && (matrix.compile_arch == 'x86_64') }} - # run: | - # $FILES = Get-ChildItem -Path .\dist\${{ env.PACKAGE_NAME }}-*-win_amd64.whl -Force -Recurse - # pip install -r requirements-dev.txt $FILES[0].FullName --force-reinstall - # rd -r daft - # pytest -v + DAFT_RUNNER: native - name: Upload wheels uses: actions/upload-artifact@v4 with: - name: wheels-${{ matrix.os }}-${{ matrix.compile_arch }} + name: wheels-${{ matrix.os }}-${{ matrix.compile_arch }}-lts-${{ matrix.lts }} path: dist - name: Send Slack notification on failure @@ -132,10 +145,13 @@ jobs: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK - publish: name: Publish wheels to PYPI and Anaconda runs-on: ubuntu-latest + environment: + name: pypi + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing needs: - build-and-test steps: @@ -144,19 +160,17 @@ jobs: python-version: ${{ env.PYTHON_VERSION }} architecture: x64 - run: pip install -U twine - - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 with: pattern: wheels-* merge-multiple: true path: dist - run: ls -R ./dist - - name: Publish bdist package to PYPI + - name: Publish package distributions to PyPI if: ${{ success() && (env.IS_PUSH == 'true') }} - run: python -m twine upload --skip-existing --disable-progress-bar ./dist/* - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + uses: pypa/gh-action-pypi-publish@release/v1 + with: + skip-existing: true - uses: conda-incubator/setup-miniconda@v3 with: From 3479b1c04a4bb2aee707ec2a484847c65ae12044 Mon Sep 17 00:00:00 2001 From: Kev Wang Date: Wed, 8 Jan 2025 16:49:14 -0800 Subject: [PATCH 2/3] chore: update PyO3 version to 0.23 (#3647) Resolves #3641 The bulk of this PR are adding function signatures for python functions defined in Rust. This is required now in 0.23 if the function has optional parameters in order to distinguish it from nullable ones. The other major change is that Python objects no longer implement `Clone`, coming from PyO3 v0.22. This is because the reference counting done to allow this actually can't happen properly without holding onto the GIL. So instead, I wrapped pyobjects that need to be cloned with an Arc. They can still be cloned with the GIL held using `pyobj.clone_ref(py)` --- .github/workflows/python-package.yml | 2 +- Cargo.lock | 1416 +++++++++++------ Cargo.toml | 4 +- daft/utils.py | 39 - src/common/arrow-ffi/src/lib.rs | 2 +- src/common/daft-config/src/python.rs | 27 + src/common/file-formats/src/file_format.rs | 2 +- .../file-formats/src/file_format_config.rs | 19 +- src/common/file-formats/src/python.rs | 24 +- src/common/io-config/src/python.rs | 105 +- src/common/partitioning/src/lib.rs | 2 +- src/common/py-serde/src/lib.rs | 2 +- src/common/py-serde/src/python.rs | 35 +- src/common/resource-request/src/lib.rs | 4 + src/common/scan-info/src/python.rs | 1 + src/daft-catalog/python-catalog/src/python.rs | 5 +- src/daft-catalog/src/python.rs | 7 +- src/daft-connect/src/lib.rs | 2 +- .../src/translation/logical_plan/range.rs | 6 +- src/daft-core/Cargo.toml | 4 +- src/daft-core/src/array/from.rs | 15 +- .../src/array/growable/python_growable.rs | 12 +- src/daft-core/src/array/ops/as_arrow.rs | 4 +- src/daft-core/src/array/ops/cast.rs | 75 +- src/daft-core/src/array/ops/concat.rs | 4 +- src/daft-core/src/array/ops/concat_agg.rs | 20 +- src/daft-core/src/array/ops/filter.rs | 4 +- src/daft-core/src/array/ops/full.rs | 2 +- src/daft-core/src/array/ops/get.rs | 6 +- src/daft-core/src/array/ops/len.rs | 69 +- src/daft-core/src/array/ops/list_agg.rs | 23 +- src/daft-core/src/array/ops/repr.rs | 2 +- src/daft-core/src/array/ops/take.rs | 6 +- .../src/array/pseudo_arrow/python.rs | 24 +- src/daft-core/src/count_mode.rs | 2 +- src/daft-core/src/join.rs | 6 +- src/daft-core/src/python/series.rs | 36 +- src/daft-core/src/series/utils/python_fn.rs | 34 +- src/daft-csv/src/lib.rs | 7 +- src/daft-csv/src/python.rs | 17 +- src/daft-dsl/src/functions/python/mod.rs | 22 +- .../src/functions/python/runtime_py_object.rs | 12 +- src/daft-dsl/src/functions/python/udf.rs | 22 +- src/daft-dsl/src/lib.rs | 28 +- src/daft-dsl/src/lit.rs | 2 +- src/daft-dsl/src/pyobj_serde.rs | 3 +- src/daft-dsl/src/python.rs | 25 +- src/daft-functions-json/src/lib.rs | 2 +- src/daft-functions/src/python/image.rs | 2 +- src/daft-functions/src/python/misc.rs | 2 +- src/daft-functions/src/python/mod.rs | 4 +- src/daft-functions/src/python/tokenize.rs | 17 +- src/daft-functions/src/python/uri.rs | 10 +- src/daft-functions/src/python/utf8.rs | 2 +- src/daft-image/src/python.rs | 4 +- src/daft-io/src/python.rs | 15 +- src/daft-json/src/lib.rs | 7 +- src/daft-json/src/python.rs | 18 +- .../intermediate_ops/actor_pool_project.rs | 2 +- src/daft-local-execution/src/progress_bar.rs | 8 +- src/daft-local-execution/src/run.rs | 22 +- src/daft-logical-plan/src/builder.rs | 60 +- .../src/optimization/rules/push_down_limit.rs | 2 +- src/daft-logical-plan/src/sink_info.rs | 8 +- .../src/source_info/file_info.rs | 1 + src/daft-micropartition/src/python.rs | 115 +- src/daft-parquet/src/python.rs | 81 +- src/daft-scan/src/builder.rs | 6 +- src/daft-scan/src/python.rs | 85 +- src/daft-scan/src/storage_config.rs | 1 + src/daft-scheduler/src/adaptive.rs | 2 +- src/daft-scheduler/src/scheduler.rs | 133 +- src/daft-schema/src/image_format.rs | 2 +- src/daft-schema/src/image_mode.rs | 2 +- src/daft-schema/src/python/datatype.rs | 11 +- src/daft-schema/src/python/schema.rs | 2 +- src/daft-sql/src/lib.rs | 6 +- src/daft-table/src/python.rs | 5 +- src/daft-writers/src/lance.rs | 6 +- src/daft-writers/src/pyarrow.rs | 18 +- src/lib.rs | 4 +- 81 files changed, 1857 insertions(+), 998 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 17a7dd793c..2bc27649ec 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -695,7 +695,7 @@ jobs: rust-tests-platform: runs-on: ${{ matrix.os }}-latest - timeout-minutes: 30 + timeout-minutes: 45 strategy: fail-fast: false matrix: diff --git a/Cargo.lock b/Cargo.lock index bad49035de..1b4d338cb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,18 +10,18 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" [[package]] name = "addr2line" -version = "0.21.0" +version = "0.24.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" dependencies = [ "gimli", ] [[package]] -name = "adler" -version = "1.0.2" +name = "adler2" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "adler32" @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -105,9 +105,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -120,43 +120,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "approx" @@ -181,9 +181,9 @@ checksum = "bf7d0a018de4f6aa429b9d33d69edf69072b1c5b1cb8d3e4a5f7ef898fc3eb76" [[package]] name = "arrayvec" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow-array" @@ -291,7 +291,7 @@ dependencies = [ "indexmap 1.9.3", "itertools 0.10.5", "json-deserializer", - "lexical-core", + "lexical-core 0.8.5", "lz4", "memchr", "multiversion", @@ -301,7 +301,7 @@ dependencies = [ "proptest", "rand 0.8.5", "regex", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", "rustc_version", "sample-arrow2", "sample-std 0.1.1", @@ -344,11 +344,11 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.12" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fec134f64e2bc57411226dfc4e52dec859ddfc7e711fc5e07b612584f000e4aa" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ - "brotli 6.0.0", + "brotli 7.0.0", "bzip2", "deflate64", "flate2", @@ -380,7 +380,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -402,18 +402,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -434,16 +434,16 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "avro-rs" @@ -506,11 +506,11 @@ dependencies = [ "fastrand 1.9.0", "hex", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.32", "ring 0.16.20", "time", "tokio", - "tower", + "tower 0.4.13", "tracing", "zeroize", ] @@ -590,7 +590,7 @@ dependencies = [ "percent-encoding", "regex", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", "url", ] @@ -616,7 +616,7 @@ dependencies = [ "http 0.2.12", "regex", "tokio-stream", - "tower", + "tower 0.4.13", "tracing", ] @@ -642,7 +642,7 @@ dependencies = [ "bytes", "http 0.2.12", "regex", - "tower", + "tower 0.4.13", "tracing", ] @@ -729,11 +729,11 @@ dependencies = [ "fastrand 1.9.0", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.32", "hyper-tls 0.5.0", "pin-project-lite", "tokio", - "tower", + "tower 0.4.13", "tracing", ] @@ -761,7 +761,7 @@ dependencies = [ "futures-core", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.32", "once_cell", "percent-encoding", "pin-project-lite", @@ -783,7 +783,7 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "pin-project-lite", - "tower", + "tower 0.4.13", "tracing", ] @@ -846,15 +846,15 @@ dependencies = [ [[package]] name = "axum" -version = "0.7.5" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", "axum-core", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "itoa", @@ -865,8 +865,8 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper 1.0.1", - "tower", + "sync_wrapper 1.0.2", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -880,13 +880,13 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", "rustversion", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tower-layer", "tower-service", ] @@ -983,17 +983,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", "miniz_oxide", "object", "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] @@ -1045,7 +1045,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ - "bit-vec", + "bit-vec 0.6.3", +] + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec 0.8.0", ] [[package]] @@ -1054,6 +1063,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "1.3.2" @@ -1077,9 +1092,9 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.11.0-rc.2" +version = "0.11.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "939c0e62efa052fb0b2db2c0f7c479ad32e364c192c3aab605a7641de265a1a7" +checksum = "3fd016a0ddc7cb13661bf5576073ce07330a693f8608a1320b4e20561cc12cdc" dependencies = [ "hybrid-array", ] @@ -1097,9 +1112,9 @@ dependencies = [ [[package]] name = "brotli" -version = "6.0.0" +version = "7.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f7971dbd9326d58187408ab83117d8ac1bb9c17b085fdacd1cf2f598719b6b" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -1128,12 +1143,12 @@ dependencies = [ [[package]] name = "bstr" -version = "1.10.0" +version = "1.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" dependencies = [ "memchr", - "regex-automata 0.4.7", + "regex-automata 0.4.9", "serde", ] @@ -1151,22 +1166,22 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.3" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" +checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" dependencies = [ "bytemuck_derive", ] [[package]] name = "bytemuck_derive" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" +checksum = "3fa76293b4f7bb636ab88fd78228235b5248b4d05cc589aed610f954af5d7c7a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -1183,9 +1198,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bytes-utils" @@ -1220,11 +1235,11 @@ dependencies = [ [[package]] name = "casey" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614586263949597dcc18675da12ef9b429135e13628d92eb8b8c6fa50ca5656b" +checksum = "8e779867f62d81627d1438e0d3fb6ed7d7c9d64293ca6d87a1e88781b94ece1c" dependencies = [ - "syn 1.0.109", + "syn 2.0.95", ] [[package]] @@ -1235,12 +1250,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.10" +version = "1.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9e8aabfac534be767c909e0690571677d49f41bd8465ae876fe043d52ba5292" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" dependencies = [ "jobserver", "libc", + "shlex", ] [[package]] @@ -1251,9 +1267,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -1335,9 +1351,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", "clap_derive", @@ -1345,13 +1361,13 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstream", "anstyle", - "clap_lex 0.7.2", + "clap_lex 0.7.4", "strsim", ] @@ -1364,7 +1380,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -1378,15 +1394,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cmake" -version = "0.1.50" +version = "0.1.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +checksum = "c682c223677e0e5b6b7f63a64b9351844c3f1b1678a68b7ee617e30fb082620e" dependencies = [ "cc", ] @@ -1399,15 +1415,15 @@ checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "colorz" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc2a5df6ee18d52a36920c93a7736761c6fcffa72b9d960fd9133dd8d57c5184" +checksum = "6ceb37c5798821e37369cb546f430f19da2f585e0364c9615ae340a9f2e6067b" dependencies = [ "supports-color", ] @@ -1420,19 +1436,19 @@ checksum = "7e959d788268e3bf9d35ace83e81b124190378e4c91c9067524675e33394b8ba" dependencies = [ "strum 0.24.1", "strum_macros 0.24.3", - "unicode-width 0.1.13", + "unicode-width 0.1.14", ] [[package]] name = "comfy-table" -version = "7.1.1" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b34115915337defe99b2aff5c2ce6771e5fbc4079f4b506301f5cf394c8452f7" +checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9" dependencies = [ "crossterm", "strum 0.26.3", "strum_macros 0.26.4", - "unicode-width 0.1.13", + "unicode-width 0.2.0", ] [[package]] @@ -1458,8 +1474,8 @@ dependencies = [ name = "common-display" version = "0.3.0-dev0" dependencies = [ - "comfy-table 7.1.1", - "indexmap 2.5.0", + "comfy-table 7.1.3", + "indexmap 2.7.0", "pyo3", "terminal_size", "textwrap", @@ -1609,15 +1625,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.8" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "unicode-width 0.1.13", - "windows-sys 0.52.0", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", ] [[package]] @@ -1628,9 +1644,9 @@ checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" [[package]] name = "const-oid" -version = "0.10.0-rc.2" +version = "0.10.0-rc.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a0d96d207edbe5135e55038e79ab9ad6d75ba83b14cdf62326ce5b12bc46ab5" +checksum = "68ff6be19477a1bd5441f382916a89bc2a0b2c35db6d41e0f6e8538bf6d6463f" [[package]] name = "const-random" @@ -1654,9 +1670,9 @@ dependencies = [ [[package]] name = "const_fn" -version = "0.4.10" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "373e9fafaa20882876db20562275ff58d50e0caa2590077fe7ce7bef90211d0d" +checksum = "2f8a2ca5ac02d09563609681103aada9e1777d54fc57a5acd7a41404f9c93b6e" [[package]] name = "core-foundation" @@ -1676,9 +1692,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -1763,18 +1779,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -1791,20 +1807,20 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" -version = "0.27.0" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ "bitflags 2.6.0", "crossterm_winapi", - "libc", "parking_lot 0.12.3", + "rustix", "winapi", ] @@ -1846,9 +1862,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1886,7 +1902,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" dependencies = [ "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -2021,7 +2037,7 @@ dependencies = [ "bincode", "chrono", "chrono-tz", - "comfy-table 7.1.1", + "comfy-table 7.1.3", "common-arrow-ffi", "common-display", "common-error", @@ -2032,11 +2048,11 @@ dependencies = [ "daft-schema", "daft-sketch", "derive_more", - "fastrand 2.1.0", + "fastrand 2.3.0", "fnv", "html-escape", "hyperloglog", - "indexmap 2.5.0", + "indexmap 2.7.0", "itertools 0.11.0", "lazy_static", "log", @@ -2108,7 +2124,7 @@ dependencies = [ "daft-core", "daft-sketch", "derive_more", - "indexmap 2.5.0", + "indexmap 2.7.0", "itertools 0.11.0", "log", "pyo3", @@ -2216,7 +2232,7 @@ dependencies = [ "google-cloud-storage", "google-cloud-token", "home", - "hyper 0.14.30", + "hyper 0.14.32", "hyper-tls 0.5.0", "itertools 0.11.0", "lazy_static", @@ -2255,7 +2271,7 @@ dependencies = [ "daft-io", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.7.0", "memchr", "memmap2", "num-traits", @@ -2299,7 +2315,7 @@ dependencies = [ "daft-table", "daft-writers", "futures", - "indexmap 2.5.0", + "indexmap 2.7.0", "indicatif", "lazy_static", "log", @@ -2347,7 +2363,7 @@ dependencies = [ "daft-functions", "daft-schema", "derivative", - "indexmap 2.5.0", + "indexmap 2.7.0", "itertools 0.11.0", "log", "pretty_assertions", @@ -2395,7 +2411,7 @@ dependencies = [ "approx", "common-error", "daft-hash", - "fastrand 2.1.0", + "fastrand 2.3.0", "memchr", "proptest", "tango-bench", @@ -2420,7 +2436,7 @@ dependencies = [ "daft-stats", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.7.0", "itertools 0.11.0", "log", "parquet2", @@ -2481,7 +2497,7 @@ dependencies = [ "daft-stats", "daft-table", "futures", - "indexmap 2.5.0", + "indexmap 2.7.0", "itertools 0.11.0", "parquet2", "pyo3", @@ -2526,7 +2542,7 @@ dependencies = [ "common-version", "derive_more", "html-escape", - "indexmap 2.5.0", + "indexmap 2.7.0", "num-derive", "num-traits", "pyo3", @@ -2577,7 +2593,7 @@ dependencies = [ "daft-core", "daft-dsl", "daft-table", - "indexmap 2.5.0", + "indexmap 2.7.0", "serde", "snafu", ] @@ -2587,7 +2603,7 @@ name = "daft-table" version = "0.3.0-dev0" dependencies = [ "arrow2", - "comfy-table 7.1.1", + "comfy-table 7.1.3", "common-arrow-ffi", "common-display", "common-error", @@ -2596,7 +2612,7 @@ dependencies = [ "daft-image", "daft-logical-plan", "html-escape", - "indexmap 2.5.0", + "indexmap 2.7.0", "num-traits", "pyo3", "rand 0.8.5", @@ -2640,7 +2656,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -2651,7 +2667,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -2724,7 +2740,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -2734,7 +2750,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -2754,7 +2770,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", "unicode-xid", ] @@ -2790,11 +2806,22 @@ version = "0.11.0-pre.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf2e3d6615d99707295a9673e889bf363a04b2a466bd320c65a72536f7577379" dependencies = [ - "block-buffer 0.11.0-rc.2", - "const-oid 0.10.0-rc.2", + "block-buffer 0.11.0-rc.3", + "const-oid 0.10.0-rc.3", "crypto-common 0.2.0-rc.1", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -2815,24 +2842,24 @@ checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "encoding_rs" -version = "0.8.34" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ "cfg-if", ] [[package]] name = "env_filter" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2c92ceda6ceec50f43169f9ee8424fe2db276791afde7b2cd8bc084cb376ab" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ "log", ] @@ -2849,9 +2876,9 @@ dependencies = [ [[package]] name = "env_logger" -version = "0.11.5" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13fa619b91fb2381732789fc5de83b45675e882f66623b7d8cb4f643017018d" +checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" dependencies = [ "anstream", "anstyle", @@ -2877,12 +2904,12 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2910,9 +2937,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" dependencies = [ "event-listener 5.3.1", "pin-project-lite", @@ -2940,7 +2967,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05" dependencies = [ - "bit-set", + "bit-set 0.5.3", "regex", ] @@ -2961,24 +2988,24 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fdeflate" -version = "0.3.4" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" dependencies = [ "simd-adler32", ] [[package]] name = "flate2" -version = "1.0.31" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f211bbe8e69bbd0cfdea405084f128ae8b4aaa6b0b522fc8f2b009084797920" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "libz-ng-sys", @@ -3101,7 +3128,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -3186,15 +3213,15 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "glob-match" @@ -3204,15 +3231,15 @@ checksum = "9985c9503b412198aa4197559e9a318524ebc4519c229bfa05a535828c950b9d" [[package]] name = "globset" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] @@ -3228,9 +3255,9 @@ dependencies = [ [[package]] name = "google-cloud-auth" -version = "0.17.1" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357160f51a60ec3e32169ad687f4abe0ee1e90c73b449aa5d11256c4f1cf2ff6" +checksum = "e57a13fbacc5e9c41ded3ad8d0373175a6b7a6ad430d99e89d314ac121b7ab06" dependencies = [ "async-trait", "base64 0.21.7", @@ -3238,7 +3265,7 @@ dependencies = [ "google-cloud-token", "home", "jsonwebtoken", - "reqwest 0.12.9", + "reqwest 0.12.12", "serde", "serde_json", "thiserror", @@ -3254,7 +3281,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04f945a208886a13d07636f38fb978da371d0abc3e34bad338124b9f8c135a8f" dependencies = [ - "reqwest 0.12.9", + "reqwest 0.12.12", "thiserror", "tokio", ] @@ -3279,7 +3306,7 @@ dependencies = [ "percent-encoding", "pkcs8", "regex", - "reqwest 0.12.9", + "reqwest 0.12.12", "reqwest-middleware", "ring 0.17.8", "serde", @@ -3313,7 +3340,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.5.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -3322,17 +3349,17 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", "bytes", "fnv", "futures-core", "futures-sink", - "http 1.1.0", - "indexmap 2.5.0", + "http 1.2.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -3383,6 +3410,12 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" + [[package]] name = "heck" version = "0.3.3" @@ -3413,18 +3446,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hermit-abi" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" - [[package]] name = "hex" version = "0.4.3" @@ -3448,11 +3469,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3477,9 +3498,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -3504,7 +3525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -3515,7 +3536,7 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "pin-project-lite", ] @@ -3542,9 +3563,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" [[package]] name = "httpdate" @@ -3554,18 +3575,18 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hybrid-array" -version = "0.2.0-rc.11" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5a41e5b0754cae5aaf7915f1df1147ba8d316fc6e019cfcc00fbaba96d5e030" +checksum = "f2d35805454dc9f8662a98d6d61886ffe26bd465f5960e0e55345c70d5c0d2a9" dependencies = [ "typenum", ] [[package]] name = "hyper" -version = "0.14.30" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ "bytes", "futures-channel", @@ -3587,15 +3608,15 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.0" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbbff0a806a4728c99295b254c8838933b5b082d75e3cb70c8dab21fdfbcfa9a" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.6", - "http 1.1.0", + "h2 0.4.7", + "http 1.2.0", "http-body 1.0.1", "httparse", "httpdate", @@ -3612,7 +3633,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.5.0", + "hyper 1.5.2", "hyper-util", "pin-project-lite", "tokio", @@ -3626,7 +3647,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.30", + "hyper 0.14.32", "native-tls", "tokio", "tokio-native-tls", @@ -3640,7 +3661,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-util", "native-tls", "tokio", @@ -3657,9 +3678,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.0", + "hyper 1.5.2", "pin-project-lite", "socket2", "tokio", @@ -3673,9 +3694,9 @@ version = "0.3.0-dev0" [[package]] name = "iana-time-zone" -version = "0.1.60" +version = "0.1.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -3694,6 +3715,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -3702,12 +3841,23 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -3756,12 +3906,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.5.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.2", "serde", ] @@ -3804,26 +3954,18 @@ dependencies = [ [[package]] name = "inventory" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" +checksum = "e5d80fade88dd420ce0d9ab6f7c58ef2272dde38db874657950f827d4982c817" +dependencies = [ + "rustversion", +] [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - -[[package]] -name = "is-terminal" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" -dependencies = [ - "hermit-abi 0.4.0", - "libc", - "windows-sys 0.52.0", -] +checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" [[package]] name = "is_ci" @@ -3855,11 +3997,20 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jaq-core" @@ -3887,7 +4038,7 @@ dependencies = [ "ahash", "dyn-clone", "hifijson", - "indexmap 2.5.0", + "indexmap 2.7.0", "jaq-syn", "once_cell", "serde_json", @@ -3938,10 +4089,11 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -3981,11 +4133,24 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "lexical-parse-float 0.8.5", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "lexical-write-float 0.8.5", + "lexical-write-integer 0.8.5", +] + +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float 1.0.5", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", + "lexical-write-float 1.0.5", + "lexical-write-integer 1.0.5", ] [[package]] @@ -3994,8 +4159,19 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" dependencies = [ - "lexical-parse-integer", - "lexical-util", + "lexical-parse-integer 0.8.6", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", "static_assertions", ] @@ -4005,7 +4181,17 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util 1.0.6", "static_assertions", ] @@ -4018,14 +4204,34 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + [[package]] name = "lexical-write-float" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" dependencies = [ - "lexical-util", - "lexical-write-integer", + "lexical-util 0.8.5", + "lexical-write-integer 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util 1.0.6", + "lexical-write-integer 1.0.5", "static_assertions", ] @@ -4035,15 +4241,25 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" dependencies = [ - "lexical-util", + "lexical-util 0.8.5", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util 1.0.6", "static_assertions", ] [[package]] name = "libc" -version = "0.2.155" +version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libflate" @@ -4067,9 +4283,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets 0.52.6", @@ -4077,15 +4293,15 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libz-ng-sys" -version = "1.1.15" +version = "1.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6409efc61b12687963e602df8ecf70e8ddacf95bc6576bcf16e3ac6328083c5" +checksum = "8f0f7295a34685977acb2e8cc8b08ee4a8dffd6cf278eeccddbe1ed55ba815d5" dependencies = [ "cmake", "libc", @@ -4097,6 +4313,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + [[package]] name = "lock_api" version = "0.4.12" @@ -4245,21 +4467,20 @@ dependencies = [ [[package]] name = "miniz_oxide" -version = "0.7.4" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ - "adler", + "adler2", "simd-adler32", ] [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", @@ -4312,14 +4533,16 @@ dependencies = [ [[package]] name = "ndarray" -version = "0.15.6" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" dependencies = [ "matrixmultiply", "num-complex", "num-integer", "num-traits", + "portable-atomic", + "portable-atomic-util", "rawpointer", ] @@ -4471,9 +4694,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "numpy" -version = "0.21.0" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4" +checksum = "b94caae805f998a07d33af06e6a3891e38556051b8045c615470a71590e13e78" dependencies = [ "libc", "ndarray", @@ -4481,7 +4704,7 @@ dependencies = [ "num-integer", "num-traits", "pyo3", - "rustc-hash", + "rustc-hash 2.1.0", ] [[package]] @@ -4505,18 +4728,18 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "oneshot" @@ -4532,9 +4755,9 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -4553,7 +4776,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -4564,18 +4787,18 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.1+3.3.1" +version = "300.4.1+3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7259953d42a81bf137fbbd73bd30a8e1914d6dce43c2b90ed575783a22608b91" +checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -4615,9 +4838,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" @@ -4662,7 +4885,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.3", + "redox_syscall 0.5.8", "smallvec", "windows-targets 0.52.6", ] @@ -4686,7 +4909,7 @@ dependencies = [ "criterion", "flate2", "futures", - "indexmap 2.5.0", + "indexmap 2.7.0", "lz4", "lz4_flex", "parquet-format-safe", @@ -4749,18 +4972,18 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "phf" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ "phf_generator", "phf_shared", @@ -4768,9 +4991,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", "rand 0.8.5", @@ -4778,38 +5001,38 @@ dependencies = [ [[package]] name = "phf_shared" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ "siphasher", ] [[package]] name = "pin-project" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bf43b791c5b9e34c3d182969b4abb522f9343702850a2e57f460d00d09b4b3" +checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.5" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" +checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -4829,9 +5052,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" [[package]] name = "plain" @@ -4850,9 +5073,9 @@ dependencies = [ [[package]] name = "plotters" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", @@ -4863,24 +5086,24 @@ dependencies = [ [[package]] name = "plotters-backend" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "png" -version = "0.17.13" +version = "0.17.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" dependencies = [ "bitflags 1.3.2", "crc32fast", @@ -4891,9 +5114,18 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.7.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] [[package]] name = "powerfmt" @@ -4922,28 +5154,28 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" +checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ - "bit-set", - "bit-vec", + "bit-set 0.8.0", + "bit-vec 0.8.0", "bitflags 2.6.0", "lazy_static", "num-traits", "rand 0.8.5", "rand_chacha 0.3.1", "rand_xorshift", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", "rusty-fork", "tempfile", "unarray", @@ -4971,12 +5203,12 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", - "prost-derive 0.13.3", + "prost-derive 0.13.4", ] [[package]] @@ -5007,39 +5239,39 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", ] [[package]] name = "pyo3" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +checksum = "e484fd2c8b4cb67ab05a318f1fd6fa8f199fcc30819f08f07d200809dba26c15" dependencies = [ "cfg-if", - "indexmap 2.5.0", + "indexmap 2.7.0", "indoc", "inventory", "libc", "memoffset", - "parking_lot 0.12.3", + "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -5049,9 +5281,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +checksum = "dc0e0469a84f208e20044b98965e1561028180219e35352a2afaf2b942beff3b" dependencies = [ "once_cell", "target-lexicon", @@ -5059,9 +5291,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +checksum = "eb1547a7f9966f6f1a0f0227564a9945fe36b90da5a93b3933fc3dc03fae372d" dependencies = [ "libc", "pyo3-build-config", @@ -5069,9 +5301,9 @@ dependencies = [ [[package]] name = "pyo3-log" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ac84e6eec1159bc2a575c9ae6723baa6ee9d45873e9bebad1e3ad7e8d28a443" +checksum = "be5bb22b77965a7b5394e9aae9897a0607b51df5167561ffc3b02643b4200bc7" dependencies = [ "arc-swap", "log", @@ -5080,27 +5312,27 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +checksum = "fdb6da8ec6fa5cedd1626c886fc8749bdcbb09424a86461eb8cdf096b7c33257" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "pyo3-macros-backend" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +checksum = "38a385202ff5a92791168b1136afae5059d3ac118457bb7bc304c197c2d33e7d" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -5138,9 +5370,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -5272,9 +5504,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -5296,19 +5528,19 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "regex" -version = "1.10.6" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.7", - "regex-syntax 0.8.4", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", ] [[package]] @@ -5322,13 +5554,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.4", + "regex-syntax 0.8.5", ] [[package]] @@ -5339,9 +5571,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "relative-path" @@ -5363,7 +5595,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.32", "hyper-tls 0.5.0", "ipnet", "js-sys", @@ -5393,19 +5625,19 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.9" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ "base64 0.22.1", "bytes", "encoding_rs", "futures-core", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-tls 0.6.0", "hyper-util", "ipnet", @@ -5421,10 +5653,11 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.1", + "sync_wrapper 1.0.2", "tokio", "tokio-native-tls", "tokio-util", + "tower 0.5.2", "tower-service", "url", "wasm-bindgen", @@ -5442,8 +5675,8 @@ checksum = "562ceb5a604d3f7c885a792d42c199fd8af239d0a51b2fa6a78aafa092452b04" dependencies = [ "anyhow", "async-trait", - "http 1.1.0", - "reqwest 0.12.9", + "http 1.2.0", + "reqwest 0.12.12", "serde", "thiserror", "tower-service", @@ -5459,10 +5692,10 @@ dependencies = [ "async-trait", "futures", "getrandom 0.2.15", - "http 1.1.0", - "hyper 1.5.0", + "http 1.2.0", + "hyper 1.5.2", "parking_lot 0.11.2", - "reqwest 0.12.9", + "reqwest 0.12.12", "reqwest-middleware", "retry-policies", "tokio", @@ -5540,7 +5773,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.87", + "syn 2.0.95", "unicode-ident", ] @@ -5556,26 +5789,32 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" + [[package]] name = "rustc_version" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ "semver", ] [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5598,15 +5837,15 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "rusty-fork" @@ -5696,11 +5935,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.23" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5726,7 +5965,7 @@ checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -5753,9 +5992,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.11.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75da29fe9b9b08fe9d6b22b5b4bcbc75d8db3aa31e639aa56bb62e9d46bfceaf" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" dependencies = [ "core-foundation-sys", "libc", @@ -5763,9 +6002,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" [[package]] name = "seq-macro" @@ -5775,18 +6014,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.206" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3e4cd94123dd520a128bcd11e34d9e9e423e7e3e50425cb1b4b1e3549d0284" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] [[package]] name = "serde_arrow" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff56acef131ef74bacc5e86c5038b524d61dee59d65c9e3e5e0f35b9de98cf99" +checksum = "f11dc39a704b214e72e4cec092fff98180ac432f5f7850dd0d55e9012c29fba9" dependencies = [ "arrow2", "bytemuck", @@ -5797,22 +6036,22 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.206" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabfb6138d2383ea8208cf98ccf69cdfb1aff4088460681d84189aa259762f97" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.134" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d" dependencies = [ - "indexmap 2.5.0", + "indexmap 2.7.0", "itoa", "memchr", "ryu", @@ -5894,6 +6133,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.2" @@ -5911,14 +6156,14 @@ checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" [[package]] name = "simd-json" -version = "0.13.10" +version = "0.13.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "570c430b3d902ea083097e853263ae782dfe40857d93db019a12356c8e8143fa" +checksum = "a0228a564470f81724e30996bbc2b171713b37b15254a6440c7e2d5449b95691" dependencies = [ "ahash", "getrandom 0.2.15", "halfbrown", - "lexical-core", + "lexical-core 1.0.5", "once_cell", "ref-cast", "serde", @@ -5929,9 +6174,9 @@ dependencies = [ [[package]] name = "simdutf8" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "simple_asn1" @@ -5947,9 +6192,9 @@ dependencies = [ [[package]] name = "siphasher" -version = "0.3.11" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "sketches-ddsketch" @@ -6023,9 +6268,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -6035,7 +6280,7 @@ dependencies = [ name = "spark-connect" version = "0.3.0-dev0" dependencies = [ - "prost 0.13.3", + "prost 0.13.4", "prost-types", "tonic", ] @@ -6071,6 +6316,12 @@ dependencies = [ "log", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" @@ -6160,7 +6411,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6171,11 +6422,10 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "supports-color" -version = "2.1.0" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +checksum = "c64fc7232dd8d2e4ac5ce4ef302b1d81e0b80d055b9d77c7c4f51f6aa4c867d6" dependencies = [ - "is-terminal", "is_ci", ] @@ -6192,9 +6442,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.87" +version = "2.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" dependencies = [ "proc-macro2", "quote", @@ -6209,9 +6459,9 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sync_wrapper" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" dependencies = [ "futures-core", ] @@ -6228,11 +6478,22 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", +] + [[package]] name = "sysinfo" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b5ae3f4f7d64646c46c4cae4e3f01d1c5d255c7406fdd7c7f999a94e488791" +checksum = "4c33cd241af0f2e9e3b5c32163b873b29956890b5342e6745b917ce9d490f4af" dependencies = [ "core-foundation-sys", "libc", @@ -6271,7 +6532,7 @@ checksum = "257822358c6f206fed78bfe6369cf959063b0644d70f88df6b19f2dadc93423e" dependencies = [ "alloca", "anyhow", - "clap 4.5.20", + "clap 4.5.23", "colorz", "glob-match", "goblin", @@ -6298,12 +6559,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.12.0" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" +checksum = "9a8a559c81686f576e8cd0290cd2a24a2a9ad80c98b3478856500fcbd7acd704" dependencies = [ "cfg-if", - "fastrand 2.1.0", + "fastrand 2.3.0", + "getrandom 0.2.15", "once_cell", "rustix", "windows-sys 0.59.0", @@ -6325,7 +6587,7 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3dffced63c2b5c7be278154d76b479f9f9920ed34e7574201407f0b14e2bbb93" dependencies = [ - "env_logger 0.11.5", + "env_logger 0.11.6", "test-log-macros", "tracing-subscriber", ] @@ -6338,7 +6600,7 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6349,27 +6611,27 @@ checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" dependencies = [ "smawk", "unicode-linebreak", - "unicode-width 0.1.13", + "unicode-width 0.1.14", ] [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6405,7 +6667,7 @@ dependencies = [ "fancy-regex", "lazy_static", "parking_lot 0.12.3", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] @@ -6430,9 +6692,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -6454,9 +6716,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -6471,6 +6733,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -6483,9 +6755,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" dependencies = [ "tinyvec_macros", ] @@ -6498,9 +6770,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -6522,7 +6794,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6537,9 +6809,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -6548,9 +6820,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -6571,20 +6843,20 @@ dependencies = [ "axum", "base64 0.22.1", "bytes", - "h2 0.4.6", - "http 1.1.0", + "h2 0.4.7", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.0", + "hyper 1.5.2", "hyper-timeout", "hyper-util", "percent-encoding", "pin-project", - "prost 0.13.3", + "prost 0.13.4", "socket2", "tokio", "tokio-stream", - "tower", + "tower 0.4.13", "tower-layer", "tower-service", "tracing", @@ -6610,23 +6882,38 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 1.0.2", + "tokio", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -6636,13 +6923,13 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6658,9 +6945,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -6679,9 +6966,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", @@ -6739,14 +7026,14 @@ checksum = "560b82d656506509d43abe30e0ba64c56b1953ab3d4fe7ba5902747a7a3cedd5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] name = "typeid" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "059d83cc991e7a42fc37bd50941885db0888e34209f8cfd9aab07ddec03bc9cf" +checksum = "0e13db2e0ccd5e14a544e8a246ba2312cd25223f616442d7f2cb0e3db614236e" [[package]] name = "typenum" @@ -6756,9 +7043,9 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "typetag" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ba3b6e86ffe0054b2c44f2d86407388b933b16cb0a70eea3929420db1d9bbe" +checksum = "044fc3365ddd307c297fe0fe7b2e70588cdab4d0f62dc52055ca0d11b174cf0e" dependencies = [ "erased-serde", "inventory", @@ -6769,13 +7056,13 @@ dependencies = [ [[package]] name = "typetag-impl" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70b20a22c42c8f1cd23ce5e34f165d4d37038f5b663ad20fb6adbdf029172483" +checksum = "d9d30226ac9cbd2d1ff775f74e8febdab985dab14fb14aa2582c29a92d5555dc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -6795,24 +7082,15 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - -[[package]] -name = "unicode-bidi" -version = "0.3.15" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-linebreak" @@ -6831,15 +7109,15 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" [[package]] name = "unicode-width" @@ -6849,9 +7127,9 @@ checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" [[package]] name = "unicode-xid" -version = "0.2.4" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "unindent" @@ -6873,9 +7151,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -6889,12 +7167,24 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-width" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -7005,46 +7295,47 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -7052,28 +7343,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" -version = "0.4.0" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -7099,9 +7390,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -7193,7 +7484,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -7204,7 +7495,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", ] [[package]] @@ -7404,6 +7695,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "xmlparser" version = "0.13.6" @@ -7412,9 +7715,9 @@ checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" [[package]] name = "xxhash-rust" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "xz2" @@ -7431,6 +7734,30 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", + "synstructure 0.13.1", +] + [[package]] name = "zerocopy" version = "0.3.2" @@ -7459,7 +7786,7 @@ checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ "proc-macro2", "syn 1.0.109", - "synstructure", + "synstructure 0.12.6", ] [[package]] @@ -7470,7 +7797,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.87", + "syn 2.0.95", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", + "synstructure 0.13.1", ] [[package]] @@ -7479,6 +7827,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.95", +] + [[package]] name = "zstd" version = "0.12.4" @@ -7534,9 +7904,9 @@ checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" [[package]] name = "zune-jpeg" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" +checksum = "99a5bab8d7dedf81405c4bb1f2b83ea057643d9cb28778cea9eecddeedd2e028" dependencies = [ "zune-core", ] diff --git a/Cargo.toml b/Cargo.toml index 5d57c96410..eae95f1264 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -278,10 +278,10 @@ path = "src/parquet2" [workspace.dependencies.pyo3] features = ["extension-module", "multiple-pymethods", "abi3-py39", "indexmap"] -version = "0.21.0" +version = "0.23.3" [workspace.dependencies.pyo3-log] -version = "0.11.0" +version = "0.12.1" [workspace.dependencies.serde] features = ["derive", "rc"] diff --git a/daft/utils.py b/daft/utils.py index b875d61799..528e005de6 100644 --- a/daft/utils.py +++ b/daft/utils.py @@ -1,8 +1,5 @@ from __future__ import annotations -import pickle -import random -import statistics from typing import Any, Callable from daft.dependencies import pa @@ -57,42 +54,6 @@ def freeze(input: dict | list | Any) -> frozenset | tuple | Any: return input -def estimate_size_bytes_pylist(pylist: list) -> int: - """Estimate the size of this list by sampling and pickling its objects.""" - if len(pylist) == 0: - return 0 - - # The pylist is non-empty. - # Sample up to 1MB or 10000 items to determine total size. - MAX_SAMPLE_QUANTITY = 10000 - MAX_SAMPLE_SIZE = 1024 * 1024 - - sample_candidates = random.sample(pylist, min(len(pylist), MAX_SAMPLE_QUANTITY)) - - sampled_sizes = [] - sample_size_allowed = MAX_SAMPLE_SIZE - for sample in sample_candidates: - size = len(pickle.dumps(sample)) - sampled_sizes.append(size) - sample_size_allowed -= size - if sample_size_allowed <= 0: - break - - # Sampling complete. - # If we ended up measuring the entire list, just return the exact value. - if len(sampled_sizes) == len(pylist): - return sum(sampled_sizes) - - # Otherwise, reduce to a one-item estimate and extrapolate. - if len(sampled_sizes) == 1: - [one_item_size_estimate] = sampled_sizes - else: - mean, stdev = statistics.mean(sampled_sizes), statistics.stdev(sampled_sizes) - one_item_size_estimate = int(mean + stdev) - - return one_item_size_estimate * len(pylist) - - def map_operator_arrow_semantics_bool( operator: Callable[[Any, Any], Any], left_pylist: list, diff --git a/src/common/arrow-ffi/src/lib.rs b/src/common/arrow-ffi/src/lib.rs index 463d2d05b8..486612ee64 100644 --- a/src/common/arrow-ffi/src/lib.rs +++ b/src/common/arrow-ffi/src/lib.rs @@ -53,7 +53,7 @@ pub fn to_py_array<'py>( (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), )?; - let array = PyModule::import_bound(py, pyo3::intern!(py, "daft.arrow_utils"))? + let array = PyModule::import(py, pyo3::intern!(py, "daft.arrow_utils"))? .getattr(pyo3::intern!(py, "remove_empty_struct_placeholders"))? .call1((array,))?; diff --git a/src/common/daft-config/src/python.rs b/src/common/daft-config/src/python.rs index f60a14b537..3371ef349c 100644 --- a/src/common/daft-config/src/python.rs +++ b/src/common/daft-config/src/python.rs @@ -29,6 +29,7 @@ impl PyDaftPlanningConfig { } } + #[pyo3(signature = (default_io_config=None))] fn with_config_values(&mut self, default_io_config: Option) -> PyResult { let mut config = self.config.as_ref().clone(); @@ -74,6 +75,32 @@ impl PyDaftExecutionConfig { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + scan_tasks_min_size_bytes=None, + scan_tasks_max_size_bytes=None, + broadcast_join_size_bytes_threshold=None, + parquet_split_row_groups_max_files=None, + sort_merge_join_sort_with_aligned_boundaries=None, + hash_join_partition_size_leniency=None, + sample_size_for_sort=None, + num_preview_rows=None, + parquet_target_filesize=None, + parquet_target_row_group_size=None, + parquet_inflation_factor=None, + csv_target_filesize=None, + csv_inflation_factor=None, + shuffle_aggregation_default_partitions=None, + partial_aggregation_threshold=None, + high_cardinality_aggregation_threshold=None, + read_sql_partition_size_bytes=None, + enable_aqe=None, + enable_native_executor=None, + default_morsel_size=None, + shuffle_algorithm=None, + pre_shuffle_merge_threshold=None, + enable_ray_tracing=None, + scantask_splitting_level=None + ))] fn with_config_values( &self, scan_tasks_min_size_bytes: Option, diff --git a/src/common/file-formats/src/file_format.rs b/src/common/file-formats/src/file_format.rs index 15a7684813..a40282b866 100644 --- a/src/common/file-formats/src/file_format.rs +++ b/src/common/file-formats/src/file_format.rs @@ -12,7 +12,7 @@ use serde::{Deserialize, Serialize}; /// Format of a file, e.g. Parquet, CSV, JSON. #[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Copy)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum FileFormat { Parquet, Csv, diff --git a/src/common/file-formats/src/file_format_config.rs b/src/common/file-formats/src/file_format_config.rs index 5d166ddbeb..ac10e2b030 100644 --- a/src/common/file-formats/src/file_format_config.rs +++ b/src/common/file-formats/src/file_format_config.rs @@ -133,6 +133,7 @@ impl Default for ParquetSourceConfig { impl ParquetSourceConfig { /// Create a config for a Parquet data source. #[new] + #[pyo3(signature = (coerce_int96_timestamp_unit=None, field_id_mapping=None, row_groups=None, chunk_size=None))] fn new( coerce_int96_timestamp_unit: Option, field_id_mapping: Option>, @@ -218,6 +219,17 @@ impl CsvSourceConfig { /// * `chunk_size` - Size of the chunks (in bytes) deserialized in parallel by the streaming reader. #[allow(clippy::too_many_arguments)] #[new] + #[pyo3(signature = ( + has_headers, + double_quote, + allow_variable_columns, + delimiter=None, + quote=None, + escape_char=None, + comment=None, + buffer_size=None, + chunk_size=None + ))] fn new( has_headers: bool, double_quote: bool, @@ -291,6 +303,7 @@ impl JsonSourceConfig { /// * `buffer_size` - Size of the buffer (in bytes) used by the streaming reader. /// * `chunk_size` - Size of the chunks (in bytes) deserialized in parallel by the streaming reader. #[new] + #[pyo3(signature = (buffer_size=None, chunk_size=None))] fn new(buffer_size: Option, chunk_size: Option) -> Self { Self::new_internal(buffer_size, chunk_size) } @@ -308,7 +321,7 @@ pub struct DatabaseSourceConfig { serialize_with = "serialize_py_object", deserialize_with = "deserialize_py_object" )] - pub conn: PyObject, + pub conn: Arc, } #[cfg(feature = "python")] @@ -337,7 +350,7 @@ impl Hash for DatabaseSourceConfig { #[cfg(feature = "python")] impl DatabaseSourceConfig { #[must_use] - pub fn new_internal(sql: String, conn: PyObject) -> Self { + pub fn new_internal(sql: String, conn: Arc) -> Self { Self { sql, conn } } @@ -355,7 +368,7 @@ impl DatabaseSourceConfig { /// Create a config for a Database data source. #[new] fn new(sql: &str, conn: PyObject) -> Self { - Self::new_internal(sql.to_string(), conn) + Self::new_internal(sql.to_string(), Arc::new(conn)) } } diff --git a/src/common/file-formats/src/python.rs b/src/common/file-formats/src/python.rs index 86e1230b73..41125930be 100644 --- a/src/common/file-formats/src/python.rs +++ b/src/common/file-formats/src/python.rs @@ -46,13 +46,25 @@ impl PyFileFormatConfig { /// Get the underlying data source config. #[getter] - fn get_config(&self, py: Python) -> PyObject { + fn get_config(&self, py: Python) -> PyResult { match self.0.as_ref() { - FileFormatConfig::Parquet(config) => config.clone().into_py(py), - FileFormatConfig::Csv(config) => config.clone().into_py(py), - FileFormatConfig::Json(config) => config.clone().into_py(py), - FileFormatConfig::Database(config) => config.clone().into_py(py), - FileFormatConfig::PythonFunction => py.None(), + FileFormatConfig::Parquet(config) => config + .clone() + .into_pyobject(py) + .map(|c| c.unbind().into_any()), + FileFormatConfig::Csv(config) => config + .clone() + .into_pyobject(py) + .map(|c| c.unbind().into_any()), + FileFormatConfig::Json(config) => config + .clone() + .into_pyobject(py) + .map(|c| c.unbind().into_any()), + FileFormatConfig::Database(config) => config + .clone() + .into_pyobject(py) + .map(|c| c.unbind().into_any()), + FileFormatConfig::PythonFunction => Ok(py.None()), } } diff --git a/src/common/io-config/src/python.rs b/src/common/io-config/src/python.rs index e161e052e6..1c28775da6 100644 --- a/src/common/io-config/src/python.rs +++ b/src/common/io-config/src/python.rs @@ -1,6 +1,7 @@ use std::{ any::Any, hash::{Hash, Hasher}, + sync::Arc, time::{Duration, SystemTime}, }; @@ -157,6 +158,7 @@ pub struct HTTPConfig { impl IOConfig { #[new] #[must_use] + #[pyo3(signature = (s3=None, azure=None, gcs=None, http=None))] pub fn new( s3: Option, azure: Option, @@ -174,6 +176,7 @@ impl IOConfig { } #[must_use] + #[pyo3(signature = (s3=None, azure=None, gcs=None, http=None))] pub fn replace( &self, s3: Option, @@ -250,6 +253,28 @@ impl_bincode_py_state_serialization!(IOConfig); impl S3Config { #[allow(clippy::too_many_arguments)] #[new] + #[pyo3(signature = ( + region_name=None, + endpoint_url=None, + key_id=None, + session_token=None, + access_key=None, + credentials_provider=None, + buffer_time=None, + max_connections=None, + retry_initial_backoff_ms=None, + connect_timeout_ms=None, + read_timeout_ms=None, + num_tries=None, + retry_mode=None, + anonymous=None, + use_ssl=None, + verify_ssl=None, + check_hostname_ssl=None, + requester_pays=None, + force_virtual_addressing=None, + profile_name=None + ))] pub fn new( region_name: Option, endpoint_url: Option, @@ -311,6 +336,28 @@ impl S3Config { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + region_name=None, + endpoint_url=None, + key_id=None, + session_token=None, + access_key=None, + credentials_provider=None, + buffer_time=None, + max_connections=None, + retry_initial_backoff_ms=None, + connect_timeout_ms=None, + read_timeout_ms=None, + num_tries=None, + retry_mode=None, + anonymous=None, + use_ssl=None, + verify_ssl=None, + check_hostname_ssl=None, + requester_pays=None, + force_virtual_addressing=None, + profile_name=None + ))] pub fn replace( &self, region_name: Option, @@ -378,7 +425,7 @@ impl S3Config { #[staticmethod] pub fn from_env(py: Python) -> PyResult { let io_config_from_env_func = py - .import_bound(pyo3::intern!(py, "daft"))? + .import(pyo3::intern!(py, "daft"))? .getattr(pyo3::intern!(py, "daft"))? .getattr(pyo3::intern!(py, "s3_config_from_env"))?; io_config_from_env_func.call0().map(|pyany| { @@ -530,6 +577,7 @@ impl S3Config { #[pymethods] impl S3Credentials { #[new] + #[pyo3(signature = (key_id, access_key, session_token=None, expiry=None))] pub fn new( py: Python, key_id: String, @@ -579,7 +627,7 @@ impl S3Credentials { self.credentials .expiry .map(|e| { - let datetime = py.import_bound(pyo3::intern!(py, "datetime"))?; + let datetime = py.import(pyo3::intern!(py, "datetime"))?; datetime .getattr(pyo3::intern!(py, "datetime"))? @@ -600,7 +648,7 @@ pub struct PyS3CredentialsProvider { serialize_with = "serialize_py_object", deserialize_with = "deserialize_py_object" )] - pub provider: PyObject, + pub provider: Arc, pub hash: isize, } @@ -608,7 +656,7 @@ impl PyS3CredentialsProvider { pub fn new(provider: Bound) -> PyResult { let hash = provider.hash()?; Ok(Self { - provider: provider.into(), + provider: Arc::new(provider.into()), hash, }) } @@ -681,6 +729,19 @@ impl AzureConfig { #[allow(clippy::too_many_arguments)] #[new] #[must_use] + #[pyo3(signature = ( + storage_account=None, + access_key=None, + sas_token=None, + bearer_token=None, + tenant_id=None, + client_id=None, + client_secret=None, + use_fabric_endpoint=None, + anonymous=None, + endpoint_url=None, + use_ssl=None + ))] pub fn new( storage_account: Option, access_key: Option, @@ -716,6 +777,19 @@ impl AzureConfig { #[allow(clippy::too_many_arguments)] #[must_use] + #[pyo3(signature = ( + storage_account=None, + access_key=None, + sas_token=None, + bearer_token=None, + tenant_id=None, + client_id=None, + client_secret=None, + use_fabric_endpoint=None, + anonymous=None, + endpoint_url=None, + use_ssl=None + ))] pub fn replace( &self, storage_account: Option, @@ -834,6 +908,17 @@ impl GCSConfig { #[allow(clippy::too_many_arguments)] #[new] #[must_use] + #[pyo3(signature = ( + project_id=None, + credentials=None, + token=None, + anonymous=None, + max_connections=None, + retry_initial_backoff_ms=None, + connect_timeout_ms=None, + read_timeout_ms=None, + num_tries=None + ))] pub fn new( project_id: Option, credentials: Option, @@ -866,6 +951,17 @@ impl GCSConfig { } #[allow(clippy::too_many_arguments)] #[must_use] + #[pyo3(signature = ( + project_id=None, + credentials=None, + token=None, + anonymous=None, + max_connections=None, + retry_initial_backoff_ms=None, + connect_timeout_ms=None, + read_timeout_ms=None, + num_tries=None + ))] pub fn replace( &self, project_id: Option, @@ -965,6 +1061,7 @@ impl From for IOConfig { impl HTTPConfig { #[new] #[must_use] + #[pyo3(signature = (bearer_token=None))] pub fn new(bearer_token: Option) -> Self { Self { config: crate::HTTPConfig::new(bearer_token), diff --git a/src/common/partitioning/src/lib.rs b/src/common/partitioning/src/lib.rs index 2df5c8cbb7..dd96941de8 100644 --- a/src/common/partitioning/src/lib.rs +++ b/src/common/partitioning/src/lib.rs @@ -145,7 +145,7 @@ pub enum PartitionCacheEntry { )] #[cfg(feature = "python")] /// in python, the partition cache is a weakvalue dictionary, so it will store the entry as long as this reference exists. - Python(PyObject), + Python(Arc), Rust { key: String, diff --git a/src/common/py-serde/src/lib.rs b/src/common/py-serde/src/lib.rs index 4c3aa3387c..be5af5f463 100644 --- a/src/common/py-serde/src/lib.rs +++ b/src/common/py-serde/src/lib.rs @@ -3,4 +3,4 @@ mod python; pub use bincode; #[cfg(feature = "python")] -pub use crate::{python::deserialize_py_object, python::serialize_py_object}; +pub use crate::{python::deserialize_py_object, python::pickle_dumps, python::serialize_py_object}; diff --git a/src/common/py-serde/src/python.rs b/src/common/py-serde/src/python.rs index 79e590bec3..44ea8d3e7e 100644 --- a/src/common/py-serde/src/python.rs +++ b/src/common/py-serde/src/python.rs @@ -1,25 +1,30 @@ -use std::fmt; +use std::{fmt, sync::Arc}; #[cfg(feature = "python")] -use pyo3::{types::PyAnyMethods, PyObject, Python}; +use pyo3::{types::PyAnyMethods, PyObject, PyResult, Python}; use serde::{ de::{Error as DeError, Visitor}, ser::Error as SerError, Deserializer, Serializer, }; +#[cfg(feature = "python")] +pub fn pickle_dumps(obj: &PyObject) -> PyResult> { + Python::with_gil(|py| { + py.import(pyo3::intern!(py, "daft.pickle")) + .and_then(|m| m.getattr(pyo3::intern!(py, "dumps"))) + .and_then(|f| f.call1((obj,))) + .and_then(|b| b.extract::>()) + }) +} + #[cfg(feature = "python")] pub fn serialize_py_object(obj: &PyObject, s: S) -> Result where S: Serializer, { - let bytes = Python::with_gil(|py| { - py.import_bound(pyo3::intern!(py, "daft.pickle")) - .and_then(|m| m.getattr(pyo3::intern!(py, "dumps"))) - .and_then(|f| f.call1((obj,))) - .and_then(|b| b.extract::>()) - .map_err(|e| SerError::custom(e.to_string())) - })?; + let bytes = pickle_dumps(obj).map_err(|e| SerError::custom(e.to_string()))?; + s.serialize_bytes(bytes.as_slice()) } #[cfg(feature = "python")] @@ -38,7 +43,7 @@ impl<'de> Visitor<'de> for PyObjectVisitor { E: DeError, { Python::with_gil(|py| { - py.import_bound(pyo3::intern!(py, "daft.pickle")) + py.import(pyo3::intern!(py, "daft.pickle")) .and_then(|m| m.getattr(pyo3::intern!(py, "loads"))) .and_then(|f| Ok(f.call1((v,))?.into())) .map_err(|e| DeError::custom(e.to_string())) @@ -66,11 +71,11 @@ impl<'de> Visitor<'de> for PyObjectVisitor { } #[cfg(feature = "python")] -pub fn deserialize_py_object<'de, D>(d: D) -> Result +pub fn deserialize_py_object<'de, D>(d: D) -> Result, D::Error> where D: Deserializer<'de>, { - d.deserialize_bytes(PyObjectVisitor) + d.deserialize_bytes(PyObjectVisitor).map(Into::into) } #[macro_export] @@ -86,13 +91,13 @@ macro_rules! impl_bincode_py_state_serialization { use pyo3::{ exceptions::PyRuntimeError, types::{PyAnyMethods, PyBytes}, - PyErr, PyTypeInfo, ToPyObject, + PyErr, PyTypeInfo, }; Ok(( - Self::type_object_bound(py) + Self::type_object(py) .getattr(pyo3::intern!(py, "_from_serialized"))? .into(), - (PyBytes::new_bound( + (PyBytes::new( py, &$crate::bincode::serialize(&self).map_err(|error| { PyErr::new::(format!( diff --git a/src/common/resource-request/src/lib.rs b/src/common/resource-request/src/lib.rs index 8239da9b06..bc9f18cfb7 100644 --- a/src/common/resource-request/src/lib.rs +++ b/src/common/resource-request/src/lib.rs @@ -203,6 +203,7 @@ fn float_max(left: f64, right: f64) -> f64 { #[pymethods] impl ResourceRequest { #[new] + #[pyo3(signature = (num_cpus=None, num_gpus=None, memory_bytes=None))] pub fn new( num_cpus: Option, num_gpus: Option, @@ -233,14 +234,17 @@ impl ResourceRequest { Ok(self.memory_bytes) } + #[pyo3(signature = (num_cpus))] pub fn with_num_cpus(&self, num_cpus: Option) -> DaftResult { Self::try_new_internal(num_cpus, self.num_gpus, self.memory_bytes) } + #[pyo3(signature = (num_gpus))] pub fn with_num_gpus(&self, num_gpus: Option) -> DaftResult { Self::try_new_internal(self.num_cpus, num_gpus, self.memory_bytes) } + #[pyo3(signature = (memory_bytes))] pub fn with_memory_bytes(&self, memory_bytes: Option) -> DaftResult { Self::try_new_internal(self.num_cpus, self.num_gpus, memory_bytes) } diff --git a/src/common/scan-info/src/python.rs b/src/common/scan-info/src/python.rs index 2a15788108..8c296f32a9 100644 --- a/src/common/scan-info/src/python.rs +++ b/src/common/scan-info/src/python.rs @@ -17,6 +17,7 @@ pub mod pylib { #[pymethods] impl PyPartitionField { #[new] + #[pyo3(signature = (field, source_field=None, transform=None))] fn new( field: PyField, source_field: Option, diff --git a/src/daft-catalog/python-catalog/src/python.rs b/src/daft-catalog/python-catalog/src/python.rs index f9de6fffcc..5d2ae500d6 100644 --- a/src/daft-catalog/python-catalog/src/python.rs +++ b/src/daft-catalog/python-catalog/src/python.rs @@ -152,7 +152,10 @@ impl DataCatalog for PythonCatalog { /// >>> daft.register_python_catalog(python_catalog, "my_catalog") /// 'default' #[pyfunction] -#[pyo3(name = "register_python_catalog")] +#[pyo3( + name = "register_python_catalog", + signature = (python_catalog_obj, catalog_name=None) +)] pub fn py_register_python_catalog( python_catalog_obj: PyObject, catalog_name: Option<&str>, diff --git a/src/daft-catalog/src/python.rs b/src/daft-catalog/src/python.rs index cc8848078f..a4896402ec 100644 --- a/src/daft-catalog/src/python.rs +++ b/src/daft-catalog/src/python.rs @@ -80,13 +80,16 @@ fn py_register_table( /// >>> daft.unregister_catalog("my_catalog") /// True #[pyfunction] -#[pyo3(name = "unregister_catalog")] +#[pyo3( + name = "unregister_catalog", + signature = (catalog_name=None) +)] pub fn py_unregister_catalog(catalog_name: Option<&str>) -> bool { crate::global_catalog::unregister_catalog(catalog_name) } pub fn register_modules<'py>(parent: &Bound<'py, PyModule>) -> PyResult> { - let module = PyModule::new_bound(parent.py(), "catalog")?; + let module = PyModule::new(parent.py(), "catalog")?; module.add_wrapped(wrap_pyfunction!(py_read_table))?; module.add_wrapped(wrap_pyfunction!(py_register_table))?; diff --git a/src/daft-connect/src/lib.rs b/src/daft-connect/src/lib.rs index 946be99c81..bd55024825 100644 --- a/src/daft-connect/src/lib.rs +++ b/src/daft-connect/src/lib.rs @@ -464,7 +464,7 @@ pub fn py_connect_start(addr: &str) -> pyo3::PyResult { #[cfg(feature = "python")] pub fn register_modules(parent: &pyo3::Bound) -> pyo3::PyResult<()> { - parent.add_function(pyo3::wrap_pyfunction_bound!(py_connect_start, parent)?)?; + parent.add_function(pyo3::wrap_pyfunction!(py_connect_start, parent)?)?; parent.add_class::()?; Ok(()) } diff --git a/src/daft-connect/src/translation/logical_plan/range.rs b/src/daft-connect/src/translation/logical_plan/range.rs index 1660bef5bf..c1ec7197ad 100644 --- a/src/daft-connect/src/translation/logical_plan/range.rs +++ b/src/daft-connect/src/translation/logical_plan/range.rs @@ -33,7 +33,7 @@ impl SparkAnalyzer<'_> { ensure!(step > 0, "step must be greater than 0"); let plan = Python::with_gil(|py| { - let range_module = PyModule::import_bound(py, "daft.io._range") + let range_module = PyModule::import(py, "daft.io._range") .wrap_err("Failed to import range module")?; let range = range_module @@ -43,7 +43,9 @@ impl SparkAnalyzer<'_> { let range = range .call1((start, end, step, partitions)) .wrap_err("Failed to create range scan operator")? - .to_object(py); + .into_pyobject(py) + .unwrap() + .unbind(); let scan_operator_handle = ScanOperatorHandle::from_python_scan_operator(range, py)?; diff --git a/src/daft-core/Cargo.toml b/src/daft-core/Cargo.toml index 84f57ff70c..65d8a2d733 100644 --- a/src/daft-core/Cargo.toml +++ b/src/daft-core/Cargo.toml @@ -38,7 +38,7 @@ itertools = {workspace = true} lazy_static = {workspace = true} log = {workspace = true} mur3 = "0.1.0" -ndarray = "0.15.6" +ndarray = "0.16.1" num-traits = {workspace = true} pyo3 = {workspace = true, optional = true} rand = "0.8.5" @@ -49,7 +49,7 @@ unicode-normalization = "0.1.24" [dependencies.numpy] optional = true -version = "0.21.0" +version = "0.23.0" [dependencies.xxhash-rust] features = ["xxh3", "const_xxh3", "xxh64"] diff --git a/src/daft-core/src/array/from.rs b/src/daft-core/src/array/from.rs index 4320a9ff8a..cac858e71b 100644 --- a/src/daft-core/src/array/from.rs +++ b/src/daft-core/src/array/from.rs @@ -151,14 +151,15 @@ impl From<(&str, Box)> for BooleanArray { } #[cfg(feature = "python")] -impl From<(&str, Vec)> for crate::datatypes::PythonArray { - fn from(item: (&str, Vec)) -> Self { +impl From<(&str, Vec>)> for crate::datatypes::PythonArray { + fn from(item: (&str, Vec>)) -> Self { use crate::array::pseudo_arrow::PseudoArrowArray; let (name, vec_pyobj) = item; - let arrow_array: Box = Box::new( - PseudoArrowArray::::from_pyobj_vec(vec_pyobj), - ); + let arrow_array: Box = + Box::new(PseudoArrowArray::>::from_pyobj_vec( + vec_pyobj, + )); let field = Field::new(name, DataType::Python); Self::new(field.into(), arrow_array).unwrap() } @@ -227,7 +228,7 @@ impl TryFrom<(&str, Vec, Vec)> for BinaryArray { impl TryFrom<( &str, - crate::array::pseudo_arrow::PseudoArrowArray, + crate::array::pseudo_arrow::PseudoArrowArray>, )> for crate::datatypes::PythonArray { type Error = DaftError; @@ -235,7 +236,7 @@ impl fn try_from( item: ( &str, - crate::array::pseudo_arrow::PseudoArrowArray, + crate::array::pseudo_arrow::PseudoArrowArray>, ), ) -> DaftResult { let (name, array) = item; diff --git a/src/daft-core/src/array/growable/python_growable.rs b/src/daft-core/src/array/growable/python_growable.rs index 1427c14c7c..be3e0de64b 100644 --- a/src/daft-core/src/array/growable/python_growable.rs +++ b/src/daft-core/src/array/growable/python_growable.rs @@ -11,7 +11,7 @@ pub struct PythonGrowable<'a> { name: String, dtype: DataType, arr_refs: Vec<&'a DataArray>, - buffer: Vec, + buffer: Vec>, } impl<'a> PythonGrowable<'a> { @@ -38,9 +38,9 @@ impl<'a> Growable for PythonGrowable<'a> { let slice_to_copy = arr .data() .as_any() - .downcast_ref::>() + .downcast_ref::>>() .unwrap(); - let pynone = pyo3::Python::with_gil(|py| py.None()); + let pynone = Arc::new(pyo3::Python::with_gil(|py| py.None())); for obj in slice_to_copy.iter() { match obj { None => self.buffer.push(pynone.clone()), @@ -50,18 +50,18 @@ impl<'a> Growable for PythonGrowable<'a> { } #[inline] fn add_nulls(&mut self, additional: usize) { - let pynone = pyo3::Python::with_gil(|py| py.None()); + let pynone = Arc::new(pyo3::Python::with_gil(|py| py.None())); for _ in 0..additional { self.buffer.push(pynone.clone()); } } #[inline] fn build(&mut self) -> common_error::DaftResult { - let mut buf: Vec = vec![]; + let mut buf: Vec> = vec![]; swap(&mut self.buffer, &mut buf); let field = Arc::new(Field::new(self.name.clone(), self.dtype.clone())); - let arr = PseudoArrowArray::::from_pyobj_vec(buf); + let arr = PseudoArrowArray::from_pyobj_vec(buf); Ok(DataArray::::new(field, Box::new(arr))?.into_series()) } } diff --git a/src/daft-core/src/array/ops/as_arrow.rs b/src/daft-core/src/array/ops/as_arrow.rs index 8964df3640..9258f57374 100644 --- a/src/daft-core/src/array/ops/as_arrow.rs +++ b/src/daft-core/src/array/ops/as_arrow.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{array, types::months_days_ns}; #[cfg(feature = "python")] @@ -64,7 +66,7 @@ impl_asarrow_dataarray!(FixedSizeBinaryArray, array::FixedSizeBinaryArray); impl_asarrow_dataarray!(IntervalArray, array::PrimitiveArray); #[cfg(feature = "python")] -impl_asarrow_dataarray!(PythonArray, PseudoArrowArray); +impl_asarrow_dataarray!(PythonArray, PseudoArrowArray>); impl_asarrow_logicalarray!(DateArray, array::PrimitiveArray); impl_asarrow_logicalarray!(TimeArray, array::PrimitiveArray); diff --git a/src/daft-core/src/array/ops/cast.rs b/src/daft-core/src/array/ops/cast.rs index 35d97a92a8..77df3b6550 100644 --- a/src/daft-core/src/array/ops/cast.rs +++ b/src/daft-core/src/array/ops/cast.rs @@ -68,7 +68,7 @@ where PySeries::from(Series::try_from((self.name(), self.data.clone()))?); let new_pyseries: PySeries = Python::with_gil(|py| -> PyResult { - PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))? + PyModule::import(py, pyo3::intern!(py, "daft.series"))? .getattr(pyo3::intern!(py, "Series"))? .getattr(pyo3::intern!(py, "_from_pyseries"))? .call1((old_pyseries,))? @@ -455,14 +455,14 @@ macro_rules! pycast_then_arrowcast { let new_pyseries = Python::with_gil(|py| -> PyResult { let old_daft_series = { - PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))? + PyModule::import(py, pyo3::intern!(py, "daft.series"))? .getattr(pyo3::intern!(py, "Series"))? .getattr(pyo3::intern!(py, "_from_pyseries"))? .call1((old_pyseries,))? }; let py_type_fn = { - PyModule::import_bound(py, pyo3::intern!(py, "builtins"))? + PyModule::import(py, pyo3::intern!(py, "builtins"))? .getattr(pyo3::intern!(py, $pytype_str))? }; @@ -589,12 +589,12 @@ fn extract_python_to_vec< } let from_numpy_dtype = { - PyModule::import_bound(py, pyo3::intern!(py, "daft.datatype"))? + PyModule::import(py, pyo3::intern!(py, "daft.datatype"))? .getattr(pyo3::intern!(py, "DataType"))? .getattr(pyo3::intern!(py, "from_numpy_dtype"))? }; - let builtins = PyModule::import_bound(py, pyo3::intern!(py, "builtins"))?; + let builtins = PyModule::import(py, pyo3::intern!(py, "builtins"))?; let py_type_fn = match child_dtype { dtype if dtype.is_integer() => Ok(builtins.getattr(pyo3::intern!(py, "int"))?), @@ -605,7 +605,7 @@ fn extract_python_to_vec< }?; let py_memory_view = py - .import_bound(pyo3::intern!(py, "builtins"))? + .import(pyo3::intern!(py, "builtins"))? .getattr(pyo3::intern!(py, "memoryview"))?; // TODO: use this to extract our the image mode @@ -628,7 +628,7 @@ fn extract_python_to_vec< { // Path if object supports buffer/array protocols. let np_as_array_fn = py - .import_bound(pyo3::intern!(py, "numpy"))? + .import(pyo3::intern!(py, "numpy"))? .getattr(pyo3::intern!(py, "asarray"))?; let pyarray = np_as_array_fn.call1((object,))?; let (num_values, shape_size) = append_values_from_numpy( @@ -654,7 +654,7 @@ fn extract_python_to_vec< } else { // Path if object does not support buffer/array protocols. // Try a best-effort conversion of the elements. - let pyiter = object.iter(); + let pyiter = object.try_iter(); if let Ok(pyiter) = pyiter { // has an iter let casted_iter = pyiter.map(|v| v.and_then(|f| py_type_fn.call1((f,)))); @@ -1115,7 +1115,7 @@ impl EmbeddingArray { (DataType::Python, DataType::Embedding(_, size)) => Python::with_gil(|py| { let physical_arrow = self.physical.flat_child.to_arrow(); let shape = (self.len(), *size); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; // Only go through FFI layer once instead of for every embedding. // We create an ndarray view on the entire embeddings array // buffer sans the validity mask, and then create a subndarray view @@ -1124,8 +1124,8 @@ impl EmbeddingArray { .call_method1(pyo3::intern!(py, "to_numpy"), (false,))? .call_method1(pyo3::intern!(py, "reshape"), (shape,))?; let ndarrays = py_array - .iter()? - .map(|a| a.unwrap().unbind()) + .try_iter()? + .map(|a| Arc::new(a.unwrap().unbind())) .collect::>(); let values_array = PseudoArrowArray::new(ndarrays.into(), self.physical.validity().cloned()); @@ -1160,7 +1160,7 @@ impl ImageArray { let ca = self.channel_array(); let ha = self.height_array(); let wa = self.width_array(); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; for i in 0..da.len() { let element = da.get(i); let shape = ( @@ -1172,10 +1172,9 @@ impl ImageArray { Some(element) => ffi::to_py_array(py, element.to_arrow(), &pyarrow)? .call_method1(pyo3::intern!(py, "to_numpy"), (false,))? .call_method1(pyo3::intern!(py, "reshape"), (shape,))?, - None => PyArray3::::zeros_bound(py, shape.into_dimension(), false) - .into_any(), + None => PyArray3::::zeros(py, shape.into_dimension(), false).into_any(), }; - ndarrays.push(py_array.unbind()); + ndarrays.push(Arc::new(py_array.unbind())); } let values_array = PseudoArrowArray::new(ndarrays.into(), self.physical.validity().cloned()); @@ -1273,7 +1272,7 @@ impl FixedShapeImageArray { *width as usize, mode.num_channels() as usize, ); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; // Only go through FFI layer once instead of for every image. // We create an (N, H, W, C) ndarray view on the entire image array // buffer sans the validity mask, and then create a subndarray view @@ -1283,8 +1282,8 @@ impl FixedShapeImageArray { .call_method1(pyo3::intern!(py, "to_numpy"), (false,))? .call_method1(pyo3::intern!(py, "reshape"), (shape,))?; let ndarrays = py_array - .iter()? - .map(|a| a.unwrap().unbind()) + .try_iter()? + .map(|a| Arc::new(a.unwrap().unbind())) .collect::>(); let values_array = PseudoArrowArray::new(ndarrays.into(), self.physical.validity().cloned()); @@ -1337,7 +1336,7 @@ impl TensorArray { let mut ndarrays = Vec::with_capacity(self.len()); let da = self.data_array(); let sa = self.shape_array(); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; for (arrow_array, shape_array) in (0..self.len()).map(|i| (da.get(i), sa.get(i))) { if let (Some(arrow_array), Some(shape_array)) = (arrow_array, shape_array) { let shape_array = shape_array.u64().unwrap().as_arrow(); @@ -1345,9 +1344,9 @@ impl TensorArray { let py_array = ffi::to_py_array(py, arrow_array.to_arrow(), &pyarrow)? .call_method1(pyo3::intern!(py, "to_numpy"), (false,))? .call_method1(pyo3::intern!(py, "reshape"), (shape,))?; - ndarrays.push(py_array.unbind()); + ndarrays.push(Arc::new(py_array.unbind())); } else { - ndarrays.push(py.None()); + ndarrays.push(Arc::new(py.None())); } } let values_array = @@ -1710,11 +1709,11 @@ impl SparseTensorArray { } #[cfg(feature = "python")] DataType::Python => Python::with_gil(|py| { - let mut pydicts: Vec> = Vec::with_capacity(self.len()); + let mut pydicts: Vec> = Vec::with_capacity(self.len()); let sa = self.shape_array(); let va = self.values_array(); let ia = self.indices_array(); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; for ((shape_array, values_array), indices_array) in sa.into_iter().zip(va.into_iter()).zip(ia.into_iter()) { @@ -1729,13 +1728,13 @@ impl SparseTensorArray { let py_indices_array = ffi::to_py_array(py, indices_array.to_arrow(), &pyarrow)? .call_method1(pyo3::intern!(py, "to_numpy"), (false,))?; - let pydict = pyo3::types::PyDict::new_bound(py); + let pydict = pyo3::types::PyDict::new(py); pydict.set_item("values", py_values_array)?; pydict.set_item("indices", py_indices_array)?; pydict.set_item("shape", shape)?; - pydicts.push(pydict.unbind().into()); + pydicts.push(Arc::new(pydict.unbind().into())); } else { - pydicts.push(py.None()); + pydicts.push(Arc::new(py.None())); } } let py_objects_array = @@ -1841,10 +1840,10 @@ impl FixedShapeSparseTensorArray { #[cfg(feature = "python")] (DataType::Python, DataType::FixedShapeSparseTensor(_, tensor_shape)) => { Python::with_gil(|py| { - let mut pydicts: Vec> = Vec::with_capacity(self.len()); + let mut pydicts: Vec> = Vec::with_capacity(self.len()); let va = self.values_array(); let ia = self.indices_array(); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; for (values_array, indices_array) in va.into_iter().zip(ia.into_iter()) { if let (Some(values_array), Some(indices_array)) = (values_array, indices_array) @@ -1855,13 +1854,13 @@ impl FixedShapeSparseTensorArray { let py_indices_array = ffi::to_py_array(py, indices_array.to_arrow(), &pyarrow)? .call_method1(pyo3::intern!(py, "to_numpy"), (false,))?; - let pydict = pyo3::types::PyDict::new_bound(py); + let pydict = pyo3::types::PyDict::new(py); pydict.set_item("values", py_values_array)?; pydict.set_item("indices", py_indices_array)?; pydict.set_item("shape", tensor_shape)?; - pydicts.push(pydict.unbind().into()); + pydicts.push(Arc::new(pydict.unbind().into())); } else { - pydicts.push(py.None()); + pydicts.push(Arc::new(py.None())); } } let py_objects_array = @@ -1885,7 +1884,7 @@ impl FixedShapeTensorArray { (DataType::Python, DataType::FixedShapeTensor(_, shape)) => { let physical_arrow = self.physical.flat_child.to_arrow(); pyo3::Python::with_gil(|py| { - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; let mut np_shape: Vec = vec![self.len() as u64]; np_shape.extend(shape); // Only go through FFI layer once instead of for every tensor element. @@ -1897,8 +1896,8 @@ impl FixedShapeTensorArray { .call_method1(pyo3::intern!(py, "to_numpy"), (false,))? .call_method1(pyo3::intern!(py, "reshape"), (np_shape,))?; let ndarrays = py_array - .iter()? - .map(|a| a.unwrap().unbind()) + .try_iter()? + .map(|a| Arc::new(a.unwrap().unbind())) .collect::>(); let values_array = PseudoArrowArray::new(ndarrays.into(), self.physical.validity().cloned()); @@ -2288,10 +2287,12 @@ where .as_arrow() .convert_logical_type(arrow_dtype) .with_validity(None); - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; - let py_array: Vec = ffi::to_py_array(py, arrow_array.to_boxed(), &pyarrow)? + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; + let py_array = ffi::to_py_array(py, arrow_array.to_boxed(), &pyarrow)? .call_method0(pyo3::intern!(py, "to_pylist"))? - .extract()?; + .try_iter()? + .map(|a| Arc::new(a.unwrap().unbind())) + .collect::>(); let values_array = PseudoArrowArray::new(py_array.into(), array.as_arrow().validity().cloned()); Ok(PythonArray::new( diff --git a/src/daft-core/src/array/ops/concat.rs b/src/daft-core/src/array/ops/concat.rs index 3424b46811..96d5fcd6b0 100644 --- a/src/daft-core/src/array/ops/concat.rs +++ b/src/daft-core/src/array/ops/concat.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::array::Array; use common_error::{DaftError, DaftResult}; @@ -89,7 +91,7 @@ where .iter() .map(|s| { s.as_any() - .downcast_ref::>() + .downcast_ref::>>() .unwrap() }) .collect(), diff --git a/src/daft-core/src/array/ops/concat_agg.rs b/src/daft-core/src/array/ops/concat_agg.rs index c222f6190e..92d925fc54 100644 --- a/src/daft-core/src/array/ops/concat_agg.rs +++ b/src/daft-core/src/array/ops/concat_agg.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::{ array::{Array, Utf8Array}, bitmap::utils::SlicesIterator, @@ -26,15 +28,15 @@ impl DaftConcatAggable for crate::datatypes::PythonArray { let pyobj_vec = self.as_arrow().to_pyobj_vec(); let pylist: Py = Python::with_gil(|py| -> PyResult> { - let pylist: Py = PyList::empty_bound(py).into(); + let pylist = PyList::empty(py); for pyobj in pyobj_vec { if !pyobj.is_none(py) { - pylist.call_method1(py, pyo3::intern!(py, "extend"), (pyobj,))?; + pylist.call_method1(pyo3::intern!(py, "extend"), (pyobj.clone_ref(py),))?; } } - Ok(pylist) + Ok(pylist.into()) })?; - let arrow_array = PseudoArrowArray::::from_pyobj_vec(vec![pylist.into()]); + let arrow_array = PseudoArrowArray::from_pyobj_vec(vec![Arc::new(pylist.into())]); Self::new(self.field().clone().into(), Box::new(arrow_array)) } fn grouped_concat(&self, groups: &super::GroupIndices) -> Self::Output { @@ -42,24 +44,24 @@ impl DaftConcatAggable for crate::datatypes::PythonArray { use crate::array::pseudo_arrow::PseudoArrowArray; - let mut result_pylists: Vec = Vec::with_capacity(groups.len()); + let mut result_pylists: Vec> = Vec::with_capacity(groups.len()); Python::with_gil(|py| -> DaftResult<()> { for group in groups { let indices_as_array = crate::datatypes::UInt64Array::from(("", group.clone())); let group_pyobjs = self.take(&indices_as_array)?.as_arrow().to_pyobj_vec(); - let pylist: Py = PyList::empty_bound(py).into(); + let pylist = PyList::empty(py); for pyobj in group_pyobjs { if !pyobj.is_none(py) { - pylist.call_method1(py, pyo3::intern!(py, "extend"), (pyobj,))?; + pylist.call_method1(pyo3::intern!(py, "extend"), (pyobj.clone_ref(py),))?; } } - result_pylists.push(pylist.into()); + result_pylists.push(Arc::new(pylist.into())); } Ok(()) })?; - let arrow_array = PseudoArrowArray::::from_pyobj_vec(result_pylists); + let arrow_array = PseudoArrowArray::from_pyobj_vec(result_pylists); Self::new(self.field().clone().into(), Box::new(arrow_array)) } } diff --git a/src/daft-core/src/array/ops/filter.rs b/src/daft-core/src/array/ops/filter.rs index f17740a549..f87e91c95d 100644 --- a/src/daft-core/src/array/ops/filter.rs +++ b/src/daft-core/src/array/ops/filter.rs @@ -1,4 +1,4 @@ -use std::borrow::Cow; +use std::{borrow::Cow, sync::Arc}; use arrow2::bitmap::utils::SlicesIterator; use common_error::DaftResult; @@ -38,7 +38,7 @@ impl crate::datatypes::PythonArray { .map(|x| x.unwrap_or(false)) .zip(self.as_arrow().values().iter()) .filter_map(|(f, item)| if f { Some(item.clone()) } else { None }) - .collect::>() + .collect::>>() }; // Apply the filter mask to the validity bitmap. diff --git a/src/daft-core/src/array/ops/full.rs b/src/daft-core/src/array/ops/full.rs index d2b90ae2e2..9decd4dd80 100644 --- a/src/daft-core/src/array/ops/full.rs +++ b/src/daft-core/src/array/ops/full.rs @@ -33,7 +33,7 @@ where let field = Field::new(name, dtype.clone()); #[cfg(feature = "python")] if dtype.is_python() { - let py_none = Python::with_gil(|py: Python| py.None()); + let py_none = Arc::new(Python::with_gil(|py: Python| py.None())); return Self::new( field.into(), diff --git a/src/daft-core/src/array/ops/get.rs b/src/daft-core/src/array/ops/get.rs index b90062b8af..df7477b2ee 100644 --- a/src/daft-core/src/array/ops/get.rs +++ b/src/daft-core/src/array/ops/get.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::types::months_days_ns; use super::as_arrow::AsArrow; @@ -114,7 +116,7 @@ impl ExtensionArray { #[cfg(feature = "python")] impl crate::datatypes::PythonArray { #[inline] - pub fn get(&self, idx: usize) -> pyo3::PyObject { + pub fn get(&self, idx: usize) -> Arc { use arrow2::array::Array; use pyo3::prelude::*; @@ -132,7 +134,7 @@ impl crate::datatypes::PythonArray { if valid { self.as_arrow().values().get(idx).unwrap().clone() } else { - Python::with_gil(|py| py.None()) + Arc::new(Python::with_gil(|py| py.None())) } } } diff --git a/src/daft-core/src/array/ops/len.rs b/src/daft-core/src/array/ops/len.rs index 4a1d8ff4dc..8b1ccadc2e 100644 --- a/src/daft-core/src/array/ops/len.rs +++ b/src/daft-core/src/array/ops/len.rs @@ -1,4 +1,9 @@ +use std::cmp::min; + use common_error::DaftResult; +#[cfg(feature = "python")] +use common_py_serde::pickle_dumps; +use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng}; use super::as_arrow::AsArrow; #[cfg(feature = "python")] @@ -21,19 +26,59 @@ where #[cfg(feature = "python")] impl PythonArray { + /// Estimate the size of this list by sampling and pickling its objects. pub fn size_bytes(&self) -> DaftResult { - use pyo3::{prelude::*, types::PyList}; - - let vector = self.as_arrow().values().to_vec(); - Python::with_gil(|py| { - let daft_utils = PyModule::import_bound(py, pyo3::intern!(py, "daft.utils"))?; - let estimate_size_bytes_pylist = - daft_utils.getattr(pyo3::intern!(py, "estimate_size_bytes_pylist"))?; - let size_bytes: usize = estimate_size_bytes_pylist - .call1((PyList::new_bound(py, vector),))? - .extract()?; - Ok(size_bytes) - }) + // Sample up to 1MB or 10000 items to determine total size. + const MAX_SAMPLE_QUANTITY: usize = 10000; + const MAX_SAMPLE_SIZE: usize = 1024 * 1024; + + if self.is_empty() { + return Ok(0); + } + + let values = self.as_arrow().values(); + + let mut rng = StdRng::seed_from_u64(0); + let sample_candidates = + values.choose_multiple(&mut rng, min(values.len(), MAX_SAMPLE_QUANTITY)); + + let mut sample_size_allowed = MAX_SAMPLE_SIZE; + let mut sampled_sizes = Vec::with_capacity(sample_candidates.len()); + for c in sample_candidates { + let size = pickle_dumps(c)?.len(); + sampled_sizes.push(size); + sample_size_allowed = sample_size_allowed.saturating_sub(size); + + if sample_size_allowed == 0 { + break; + } + } + + if sampled_sizes.len() == values.len() { + // Sampling complete. + // If we ended up measuring the entire list, just return the exact value. + + Ok(sampled_sizes.into_iter().sum()) + } else { + // Otherwise, reduce to a one-item estimate and extrapolate. + + let one_item_size_estimate = if sampled_sizes.len() == 1 { + sampled_sizes[0] + } else { + let sampled_len = sampled_sizes.len() as f64; + + let mean: f64 = sampled_sizes.iter().map(|&x| x as f64).sum::() / sampled_len; + let stdev: f64 = sampled_sizes + .iter() + .map(|&x| ((x as f64) - mean).powi(2)) + .sum::() + / sampled_len; + + (mean + stdev) as usize + }; + + Ok(one_item_size_estimate * values.len()) + } } } diff --git a/src/daft-core/src/array/ops/list_agg.rs b/src/daft-core/src/array/ops/list_agg.rs index 89bf7090a1..17eb5f9565 100644 --- a/src/daft-core/src/array/ops/list_agg.rs +++ b/src/daft-core/src/array/ops/list_agg.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use common_error::DaftResult; use super::{as_arrow::AsArrow, DaftListAggable, GroupIndices}; @@ -89,9 +91,16 @@ impl DaftListAggable for crate::datatypes::PythonArray { let pyobj_vec = self.as_arrow().to_pyobj_vec(); - let pylist: Py = Python::with_gil(|py| PyList::new_bound(py, pyobj_vec).into()); + let pylist: Py = Python::with_gil(|py| { + let pyobj_vec_cloned = pyobj_vec + .into_iter() + .map(|pyobj| pyobj.clone_ref(py)) + .collect::>(); + + PyList::new(py, pyobj_vec_cloned).map(Into::into) + })?; - let arrow_array = PseudoArrowArray::::from_pyobj_vec(vec![pylist.into()]); + let arrow_array = PseudoArrowArray::from_pyobj_vec(vec![Arc::new(pylist.into())]); Self::new(self.field().clone().into(), Box::new(arrow_array)) } @@ -100,18 +109,22 @@ impl DaftListAggable for crate::datatypes::PythonArray { use crate::array::pseudo_arrow::PseudoArrowArray; - let mut result_pylists: Vec = Vec::with_capacity(groups.len()); + let mut result_pylists: Vec> = Vec::with_capacity(groups.len()); Python::with_gil(|py| -> DaftResult<()> { for group in groups { let indices_as_array = crate::datatypes::UInt64Array::from(("", group.clone())); let group_pyobjs = self.take(&indices_as_array)?.as_arrow().to_pyobj_vec(); - result_pylists.push(PyList::new_bound(py, group_pyobjs).into()); + let group_pyobjs_cloned = group_pyobjs + .into_iter() + .map(|pyobj| pyobj.clone_ref(py)) + .collect::>(); + result_pylists.push(Arc::new(PyList::new(py, group_pyobjs_cloned)?.into())); } Ok(()) })?; - let arrow_array = PseudoArrowArray::::from_pyobj_vec(result_pylists); + let arrow_array = PseudoArrowArray::from_pyobj_vec(result_pylists); Self::new(self.field().clone().into(), Box::new(arrow_array)) } } diff --git a/src/daft-core/src/array/ops/repr.rs b/src/daft-core/src/array/ops/repr.rs index 7eba5c0ba1..f9cfc486dc 100644 --- a/src/daft-core/src/array/ops/repr.rs +++ b/src/daft-core/src/array/ops/repr.rs @@ -457,7 +457,7 @@ impl crate::datatypes::PythonArray { // Find visualization hooks for this object's class let pyany = val.bind(py); let get_viz_hook = py - .import_bound(pyo3::intern!(py, "daft.viz.html_viz_hooks"))? + .import(pyo3::intern!(py, "daft.viz.html_viz_hooks"))? .getattr(pyo3::intern!(py, "get_viz_hook"))?; let hook = get_viz_hook.call1((pyany,))?; diff --git a/src/daft-core/src/array/ops/take.rs b/src/daft-core/src/array/ops/take.rs index 0cceec49a0..47830b4114 100644 --- a/src/daft-core/src/array/ops/take.rs +++ b/src/daft-core/src/array/ops/take.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use arrow2::types::Index; use common_error::DaftResult; @@ -122,8 +124,8 @@ impl crate::datatypes::PythonArray { let old_values = self.as_arrow().values(); // Execute take on the data values, ignoring validity. - let new_values: Vec = { - let py_none = Python::with_gil(|py: Python| py.None()); + let new_values: Vec> = { + let py_none = Arc::new(Python::with_gil(|py: Python| py.None())); indices .iter() diff --git a/src/daft-core/src/array/pseudo_arrow/python.rs b/src/daft-core/src/array/pseudo_arrow/python.rs index 1bdc73cb2c..e5b294c15c 100644 --- a/src/daft-core/src/array/pseudo_arrow/python.rs +++ b/src/daft-core/src/array/pseudo_arrow/python.rs @@ -1,11 +1,13 @@ +use std::sync::Arc; + use arrow2::{array::Array, bitmap::Bitmap}; use pyo3::prelude::*; use crate::array::pseudo_arrow::PseudoArrowArray; -impl PseudoArrowArray { - pub fn from_pyobj_vec(pyobj_vec: Vec) -> Self { - // Converts this Vec into a PseudoArrowArray. +impl PseudoArrowArray> { + pub fn from_pyobj_vec(pyobj_vec: Vec>) -> Self { + // Converts this Vec into a PseudoArrowArray>. // PyNones will be marked as invalid bits in the validity bitmap. let validity: arrow2::bitmap::Bitmap = Python::with_gil(|py| { @@ -14,17 +16,19 @@ impl PseudoArrowArray { Self::new(pyobj_vec.into(), Some(validity)) } - pub fn to_pyobj_vec(&self) -> Vec { - // Converts this PseudoArrowArray into a Vec, + pub fn to_pyobj_vec(&self) -> Vec> { + // Converts this PseudoArrowArray> into a Vec>, // taking into account the validity bitmap. // Invalid slots will be set to py.None(). if self.validity().is_some() { Python::with_gil(|py| { self.iter() - .map(|opt_val| match opt_val { - Some(pyobj) => pyobj.clone_ref(py), - None => py.None(), + .map(|opt_val| { + Arc::new(match opt_val { + Some(pyobj) => pyobj.clone_ref(py), + None => py.None(), + }) }) .collect() }) @@ -38,9 +42,9 @@ impl PseudoArrowArray { lhs: &dyn Array, rhs: &dyn Array, ) -> Self { - let pynone = Python::with_gil(|py| py.None()); + let pynone = Python::with_gil(|py| Arc::new(py.None())); - let (new_values, new_validity): (Vec, Vec) = { + let (new_values, new_validity): (Vec>, Vec) = { lhs.as_any() .downcast_ref::() .unwrap() diff --git a/src/daft-core/src/count_mode.rs b/src/daft-core/src/count_mode.rs index ff0d909239..547e27a8df 100644 --- a/src/daft-core/src/count_mode.rs +++ b/src/daft-core/src/count_mode.rs @@ -14,7 +14,7 @@ use serde::{Deserialize, Serialize}; /// | Null - Count only null values. #[allow(clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum CountMode { All = 1, Valid = 2, diff --git a/src/daft-core/src/join.rs b/src/daft-core/src/join.rs index 49af3c1adf..a2042ec118 100644 --- a/src/daft-core/src/join.rs +++ b/src/daft-core/src/join.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; /// Type of a join operation. #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum JoinType { Inner, Left, @@ -73,7 +73,7 @@ impl FromStr for JoinType { } #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum JoinStrategy { Hash, SortMerge, @@ -129,7 +129,7 @@ impl FromStr for JoinStrategy { } #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum JoinSide { Left, Right, diff --git a/src/daft-core/src/python/series.rs b/src/daft-core/src/python/series.rs index 1886d38727..008cc69e4d 100644 --- a/src/daft-core/src/python/series.rs +++ b/src/daft-core/src/python/series.rs @@ -1,6 +1,7 @@ use std::{ hash::BuildHasherDefault, ops::{Add, Div, Mul, Rem, Sub}, + sync::Arc, }; use common_arrow_ffi as ffi; @@ -46,17 +47,22 @@ impl PySeries { // This ingests a Python list[object] directly into a Rust PythonArray. #[staticmethod] - pub fn from_pylist(name: &str, pylist: Bound, pyobj: &str) -> PyResult { + pub fn from_pylist( + py: Python<'_>, + name: &str, + pylist: Bound, + pyobj: &str, + ) -> PyResult { let vec_pyobj: Vec = pylist.extract()?; - let py = pylist.py(); let dtype = match pyobj { "force" => DataType::Python, "allow" => infer_daft_dtype_for_sequence(&vec_pyobj, py, name)?.unwrap_or(DataType::Python), "disallow" => panic!("Cannot create a Series from a pylist and being strict about only using Arrow types by setting pyobj=disallow"), _ => panic!("Unsupported pyobj behavior when creating Series from pylist: {}", pyobj) }; + let vec_pyobj_arced = vec_pyobj.into_iter().map(Arc::new).collect(); let arrow_array: Box = - Box::new(PseudoArrowArray::::from_pyobj_vec(vec_pyobj)); + Box::new(PseudoArrowArray::from_pyobj_vec(vec_pyobj_arced)); let field = Field::new(name, DataType::Python); let data_array = DataArray::::new(field.into(), arrow_array)?; @@ -66,17 +72,23 @@ impl PySeries { // This is for PythonArrays only, // to convert the Rust PythonArray to a Python list[object]. - pub fn to_pylist(&self) -> PyResult { + pub fn to_pylist<'a>(&self, py: Python<'a>) -> PyResult> { let pseudo_arrow_array = self.series.python()?.as_arrow(); let pyobj_vec = pseudo_arrow_array.to_pyobj_vec(); - Python::with_gil(|py| Ok(PyList::new_bound(py, pyobj_vec).into())) + + let pyobj_vec_cloned = pyobj_vec + .into_iter() + .map(|pyobj| pyobj.clone_ref(py)) + .collect::>(); + + PyList::new(py, pyobj_vec_cloned) } pub fn to_arrow(&self) -> PyResult { let arrow_array = self.series.to_arrow(); let arrow_array = cast_array_from_daft_if_needed(arrow_array); Python::with_gil(|py| { - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; Ok(ffi::to_py_array(py, arrow_array, &pyarrow)?.unbind()) }) } @@ -310,6 +322,7 @@ impl PySeries { Ok(self.series.argsort(descending, nulls_first)?.into()) } + #[pyo3(signature = (seed=None))] pub fn hash(&self, seed: Option) -> PyResult { let seed_series; let mut seed_array = None; @@ -567,6 +580,7 @@ impl PySeries { Ok(self.series.utf8_to_date(format)?.into()) } + #[pyo3(signature = (format, timezone=None))] pub fn utf8_to_datetime(&self, format: &str, timezone: Option<&str>) -> PyResult { Ok(self.series.utf8_to_datetime(format, timezone)?.into()) } @@ -731,7 +745,7 @@ impl PySeries { pub fn _debug_bincode_serialize(&self, py: Python) -> PyResult { let values = bincode::serialize(&self.series).unwrap(); - Ok(PyBytes::new_bound(py, &values).into()) + Ok(PyBytes::new(py, &values).into()) } #[staticmethod] @@ -763,16 +777,16 @@ fn infer_daft_dtype_for_sequence( _name: &str, ) -> PyResult> { let py_pil_image_type = py - .import_bound(pyo3::intern!(py, "PIL.Image")) + .import(pyo3::intern!(py, "PIL.Image")) .and_then(|m| m.getattr(pyo3::intern!(py, "Image"))); let np_ndarray_type = py - .import_bound(pyo3::intern!(py, "numpy")) + .import(pyo3::intern!(py, "numpy")) .and_then(|m| m.getattr(pyo3::intern!(py, "ndarray"))); let np_generic_type = py - .import_bound(pyo3::intern!(py, "numpy")) + .import(pyo3::intern!(py, "numpy")) .and_then(|m| m.getattr(pyo3::intern!(py, "generic"))); let from_numpy_dtype = { - py.import_bound(pyo3::intern!(py, "daft.datatype"))? + py.import(pyo3::intern!(py, "daft.datatype"))? .getattr(pyo3::intern!(py, "DataType"))? .getattr(pyo3::intern!(py, "from_numpy_dtype"))? }; diff --git a/src/daft-core/src/series/utils/python_fn.rs b/src/daft-core/src/series/utils/python_fn.rs index f0d4745999..8ab7f48ddd 100644 --- a/src/daft-core/src/series/utils/python_fn.rs +++ b/src/daft-core/src/series/utils/python_fn.rs @@ -38,18 +38,18 @@ fn python_binary_op_with_utilfn( (a, b) => panic!("Cannot apply operation on arrays of different lengths: {a} vs {b}"), }; - let left_pylist = PySeries::from(lhs.clone()).to_pylist()?; - let right_pylist = PySeries::from(rhs).to_pylist()?; - let result_series: Series = Python::with_gil(|py| -> PyResult { + let left_pylist = PySeries::from(lhs.clone()).to_pylist(py)?; + let right_pylist = PySeries::from(rhs).to_pylist(py)?; + let py_operator = - PyModule::import_bound(py, pyo3::intern!(py, "operator"))?.getattr(operator_fn)?; + PyModule::import(py, pyo3::intern!(py, "operator"))?.getattr(operator_fn)?; - let result_pylist = PyModule::import_bound(py, pyo3::intern!(py, "daft.utils"))? + let result_pylist = PyModule::import(py, pyo3::intern!(py, "daft.utils"))? .getattr(util_fn)? .call1((py_operator, left_pylist, right_pylist))?; - PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))? + PyModule::import(py, pyo3::intern!(py, "daft.series"))? .getattr(pyo3::intern!(py, "Series"))? .getattr(pyo3::intern!(py, "from_pylist"))? .call1((result_pylist, lhs.name(), pyo3::intern!(py, "disallow")))? @@ -68,15 +68,15 @@ pub fn py_membership_op_utilfn(lhs: &Series, rhs: &Series) -> DaftResult let lhs_casted = lhs.cast(&DataType::Python)?; let rhs_casted = rhs.cast(&DataType::Python)?; - let left_pylist = PySeries::from(lhs_casted.clone()).to_pylist()?; - let right_pylist = PySeries::from(rhs_casted).to_pylist()?; - let result_series: Series = Python::with_gil(|py| -> PyResult { - let result_pylist = PyModule::import_bound(py, pyo3::intern!(py, "daft.utils"))? + let left_pylist = PySeries::from(lhs_casted.clone()).to_pylist(py)?; + let right_pylist = PySeries::from(rhs_casted).to_pylist(py)?; + + let result_pylist = PyModule::import(py, pyo3::intern!(py, "daft.utils"))? .getattr(pyo3::intern!(py, "python_list_membership_check"))? .call1((left_pylist, right_pylist))?; - PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))? + PyModule::import(py, pyo3::intern!(py, "daft.series"))? .getattr(pyo3::intern!(py, "Series"))? .getattr(pyo3::intern!(py, "from_pylist"))? .call1(( @@ -127,16 +127,16 @@ pub fn py_between_op_utilfn(value: &Series, lower: &Series, upper: &Series) -> D } }; - let value_pylist = PySeries::from(value_casted.clone()).to_pylist()?; - let lower_pylist = PySeries::from(lower_casted).to_pylist()?; - let upper_pylist = PySeries::from(upper_casted).to_pylist()?; - let result_series: Series = Python::with_gil(|py| -> PyResult { - let result_pylist = PyModule::import_bound(py, pyo3::intern!(py, "daft.utils"))? + let value_pylist = PySeries::from(value_casted.clone()).to_pylist(py)?; + let lower_pylist = PySeries::from(lower_casted).to_pylist(py)?; + let upper_pylist = PySeries::from(upper_casted).to_pylist(py)?; + + let result_pylist = PyModule::import(py, pyo3::intern!(py, "daft.utils"))? .getattr(pyo3::intern!(py, "python_list_between_check"))? .call1((value_pylist, lower_pylist, upper_pylist))?; - PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))? + PyModule::import(py, pyo3::intern!(py, "daft.series"))? .getattr(pyo3::intern!(py, "Series"))? .getattr(pyo3::intern!(py, "from_pylist"))? .call1(( diff --git a/src/daft-csv/src/lib.rs b/src/daft-csv/src/lib.rs index 4a738ab2a8..f2b2954673 100644 --- a/src/daft-csv/src/lib.rs +++ b/src/daft-csv/src/lib.rs @@ -76,10 +76,7 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; - parent.add_function(wrap_pyfunction_bound!(python::pylib::read_csv, parent)?)?; - parent.add_function(wrap_pyfunction_bound!( - python::pylib::read_csv_schema, - parent - )?)?; + parent.add_function(wrap_pyfunction!(python::pylib::read_csv, parent)?)?; + parent.add_function(wrap_pyfunction!(python::pylib::read_csv_schema, parent)?)?; Ok(()) } diff --git a/src/daft-csv/src/python.rs b/src/daft-csv/src/python.rs index 202eaf67d3..875a58e3a0 100644 --- a/src/daft-csv/src/python.rs +++ b/src/daft-csv/src/python.rs @@ -8,7 +8,14 @@ pub mod pylib { use crate::{CsvConvertOptions, CsvParseOptions, CsvReadOptions}; - #[pyfunction] + #[pyfunction(signature = ( + uri, + convert_options=None, + parse_options=None, + read_options=None, + io_config=None, + multithreaded_io=None + ))] pub fn read_csv( py: Python, uri: &str, @@ -39,7 +46,13 @@ pub mod pylib { }) } - #[pyfunction] + #[pyfunction(signature = ( + uri, + parse_options=None, + max_bytes=None, + io_config=None, + multithreaded_io=None + ))] pub fn read_csv_schema( py: Python, uri: &str, diff --git a/src/daft-dsl/src/functions/python/mod.rs b/src/daft-dsl/src/functions/python/mod.rs index 917345d632..8a7a5c2086 100644 --- a/src/daft-dsl/src/functions/python/mod.rs +++ b/src/daft-dsl/src/functions/python/mod.rs @@ -8,6 +8,8 @@ use common_resource_request::ResourceRequest; use common_treenode::{TreeNode, TreeNodeRecursion}; use daft_core::prelude::*; use itertools::Itertools; +#[cfg(feature = "python")] +use pyo3::Python; pub use runtime_py_object::RuntimePyObject; use serde::{Deserialize, Serialize}; @@ -154,14 +156,15 @@ pub fn get_batch_size(exprs: &[ExprRef]) -> Option { #[cfg(feature = "python")] fn py_udf_initialize( - func: pyo3::PyObject, - init_args: pyo3::PyObject, + py: Python<'_>, + func: Arc, + init_args: Arc, ) -> DaftResult { - use pyo3::Python; - - Ok(Python::with_gil(move |py| { - func.call_method1(py, pyo3::intern!(py, "initialize"), (init_args,)) - })?) + Ok(func.call_method1( + py, + pyo3::intern!(py, "initialize"), + (init_args.clone_ref(py),), + )?) } /// Initializes all uninitialized UDFs in the expression @@ -180,8 +183,9 @@ pub fn initialize_udfs(expr: ExprRef) -> DaftResult { ), inputs, } => { - let initialized_func = - py_udf_initialize(inner.clone().unwrap(), init_args.clone().unwrap())?; + let initialized_func = Python::with_gil(|py| { + py_udf_initialize(py, inner.clone().unwrap(), init_args.clone().unwrap()) + })?; let initialized_expr = Expr::Function { func: FunctionExpr::Python(PythonUDF { diff --git a/src/daft-dsl/src/functions/python/runtime_py_object.rs b/src/daft-dsl/src/functions/python/runtime_py_object.rs index d38ebbefe3..b29ea9994b 100644 --- a/src/daft-dsl/src/functions/python/runtime_py_object.rs +++ b/src/daft-dsl/src/functions/python/runtime_py_object.rs @@ -1,5 +1,7 @@ #![allow(clippy::all, reason = "todo: remove; getting a rustc error")] +use std::sync::Arc; + use serde::{Deserialize, Serialize}; /// A wrapper around PyObject that is safe to use even when the Python feature flag isn't turned on @@ -12,9 +14,11 @@ pub struct RuntimePyObject { impl RuntimePyObject { #[cfg(feature = "test-utils")] pub fn new_testing_none() -> Self { + use std::sync::Arc; + #[cfg(feature = "python")] { - let none_value = pyo3::Python::with_gil(|py| py.None()); + let none_value = Arc::new(pyo3::Python::with_gil(|py| py.None())); Self { obj: crate::pyobj_serde::PyObjectWrapper(none_value), } @@ -26,14 +30,14 @@ impl RuntimePyObject { } #[cfg(feature = "python")] - pub fn new(value: pyo3::PyObject) -> Self { + pub fn new(value: Arc) -> Self { Self { obj: crate::pyobj_serde::PyObjectWrapper(value), } } #[cfg(feature = "python")] - pub fn unwrap(self) -> pyo3::PyObject { + pub fn unwrap(self) -> Arc { self.obj.0 } } @@ -49,6 +53,6 @@ impl AsRef for RuntimePyObject { #[cfg(feature = "python")] impl From for RuntimePyObject { fn from(value: pyo3::PyObject) -> Self { - Self::new(value) + Self::new(Arc::new(value)) } } diff --git a/src/daft-dsl/src/functions/python/udf.rs b/src/daft-dsl/src/functions/python/udf.rs index e08257cdd9..6cb0e7359c 100644 --- a/src/daft-dsl/src/functions/python/udf.rs +++ b/src/daft-dsl/src/functions/python/udf.rs @@ -21,7 +21,7 @@ fn run_udf( use daft_core::python::{PyDataType, PySeries}; // Convert input Rust &[Series] to wrapped Python Vec> - let py_series_module = PyModule::import_bound(py, pyo3::intern!(py, "daft.series"))?; + let py_series_module = PyModule::import(py, pyo3::intern!(py, "daft.series"))?; let py_series_class = py_series_module.getattr(pyo3::intern!(py, "Series"))?; let pyseries: PyResult>> = inputs .iter() @@ -36,7 +36,7 @@ fn run_udf( let pyseries = pyseries?; // Run the function on the converted Vec> - let py_udf_module = PyModule::import_bound(py, pyo3::intern!(py, "daft.udf"))?; + let py_udf_module = PyModule::import(py, pyo3::intern!(py, "daft.udf"))?; let run_udf_func = py_udf_module.getattr(pyo3::intern!(py, "run_udf"))?; let result = run_udf_func.call1(( func, // Function to run @@ -73,21 +73,21 @@ impl PythonUDF { ))); } - let func = match &self.func { - MaybeInitializedUDF::Initialized(func) => func.clone().unwrap(), - MaybeInitializedUDF::Uninitialized { inner, init_args } => { - // TODO(Kevin): warn user if initialization is taking too long and ask them to use actor pool UDFs + Python::with_gil(|py| { + let func = match &self.func { + MaybeInitializedUDF::Initialized(func) => func.clone().unwrap().clone_ref(py), + MaybeInitializedUDF::Uninitialized { inner, init_args } => { + // TODO(Kevin): warn user if initialization is taking too long and ask them to use actor pool UDFs - py_udf_initialize(inner.clone().unwrap(), init_args.clone().unwrap())? - } - }; + py_udf_initialize(py, inner.clone().unwrap(), init_args.clone().unwrap())? + } + }; - Python::with_gil(|py| { run_udf( py, inputs, func, - self.bound_args.clone().unwrap(), + self.bound_args.clone().unwrap().clone_ref(py), &self.return_dtype, self.batch_size, ) diff --git a/src/daft-dsl/src/lib.rs b/src/daft-dsl/src/lib.rs index 4c92027e9a..fe7c44f068 100644 --- a/src/daft-dsl/src/lib.rs +++ b/src/daft-dsl/src/lib.rs @@ -28,20 +28,20 @@ pub use resolve_expr::{check_column_name_validity, ExprResolver}; pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; - parent.add_function(wrap_pyfunction_bound!(python::col, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::date_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::time_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::timestamp_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::duration_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::interval_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::decimal_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::series_lit, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::udf, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::initialize_udfs, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::get_udf_names, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::eq, parent)?)?; - parent.add_function(wrap_pyfunction_bound!( + parent.add_function(wrap_pyfunction!(python::col, parent)?)?; + parent.add_function(wrap_pyfunction!(python::lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::date_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::time_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::timestamp_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::duration_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::interval_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::decimal_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::series_lit, parent)?)?; + parent.add_function(wrap_pyfunction!(python::udf, parent)?)?; + parent.add_function(wrap_pyfunction!(python::initialize_udfs, parent)?)?; + parent.add_function(wrap_pyfunction!(python::get_udf_names, parent)?)?; + parent.add_function(wrap_pyfunction!(python::eq, parent)?)?; + parent.add_function(wrap_pyfunction!( python::check_column_name_validity, parent )?)?; diff --git a/src/daft-dsl/src/lit.rs b/src/daft-dsl/src/lit.rs index c1c7ce81c3..da08460805 100644 --- a/src/daft-dsl/src/lit.rs +++ b/src/daft-dsl/src/lit.rs @@ -409,7 +409,7 @@ impl Literal for Series { #[cfg(feature = "python")] impl Literal for pyo3::PyObject { fn literal_value(self) -> LiteralValue { - LiteralValue::Python(PyObjectWrapper(self)) + LiteralValue::Python(PyObjectWrapper(Arc::new(self))) } } diff --git a/src/daft-dsl/src/pyobj_serde.rs b/src/daft-dsl/src/pyobj_serde.rs index 259f9dc7d8..2f713806dd 100644 --- a/src/daft-dsl/src/pyobj_serde.rs +++ b/src/daft-dsl/src/pyobj_serde.rs @@ -1,6 +1,7 @@ use std::{ hash::{Hash, Hasher}, io::Write, + sync::Arc, }; use common_py_serde::{deserialize_py_object, serialize_py_object}; @@ -14,7 +15,7 @@ pub struct PyObjectWrapper( serialize_with = "serialize_py_object", deserialize_with = "deserialize_py_object" )] - pub PyObject, + pub Arc, ); impl PartialEq for PyObjectWrapper { diff --git a/src/daft-dsl/src/python.rs b/src/daft-dsl/src/python.rs index 928e3ced13..a12058f4bd 100644 --- a/src/daft-dsl/src/python.rs +++ b/src/daft-dsl/src/python.rs @@ -40,7 +40,7 @@ pub fn time_lit(item: i64, tu: PyTimeUnit) -> PyResult { Ok(expr.into()) } -#[pyfunction] +#[pyfunction(signature = (val, tu, tz=None))] pub fn timestamp_lit(val: i64, tu: PyTimeUnit, tz: Option) -> PyResult { let expr = Expr::Literal(LiteralValue::Timestamp(val, tu.timeunit, tz)); Ok(expr.into()) @@ -52,8 +52,17 @@ pub fn duration_lit(val: i64, tu: PyTimeUnit) -> PyResult { Ok(expr.into()) } -#[pyfunction] #[allow(clippy::too_many_arguments)] +#[pyfunction(signature = ( + years=None, + months=None, + days=None, + hours=None, + minutes=None, + seconds=None, + millis=None, + nanos=None +))] pub fn interval_lit( years: Option, months: Option, @@ -173,8 +182,18 @@ pub fn lit(item: Bound) -> PyResult { } } -#[pyfunction] #[allow(clippy::too_many_arguments)] +#[pyfunction(signature = ( + name, + inner, + bound_args, + expressions, + return_dtype, + init_args, + resource_request=None, + batch_size=None, + concurrency=None +))] pub fn udf( name: &str, inner: PyObject, diff --git a/src/daft-functions-json/src/lib.rs b/src/daft-functions-json/src/lib.rs index 6c57b15039..6209651369 100644 --- a/src/daft-functions-json/src/lib.rs +++ b/src/daft-functions-json/src/lib.rs @@ -133,7 +133,7 @@ pub fn py_json_query(expr: PyExpr, query: &str) -> PyResult { #[cfg(feature = "python")] pub fn register_modules(parent: &Bound) -> PyResult<()> { - parent.add_function(wrap_pyfunction_bound!(py_json_query, parent)?)?; + parent.add_function(wrap_pyfunction!(py_json_query, parent)?)?; Ok(()) } diff --git a/src/daft-functions/src/python/image.rs b/src/daft-functions/src/python/image.rs index 4e9d50cfe9..bafba42d89 100644 --- a/src/daft-functions/src/python/image.rs +++ b/src/daft-functions/src/python/image.rs @@ -7,7 +7,7 @@ use crate::image::{decode::ImageDecode, encode::ImageEncode}; simple_python_wrapper!(image_crop, crate::image::crop::crop, [expr: PyExpr, bbox: PyExpr]); simple_python_wrapper!(image_to_mode, crate::image::to_mode::image_to_mode, [expr: PyExpr, mode: ImageMode]); -#[pyfunction] +#[pyfunction(signature = (expr, raise_on_error=None, mode=None))] pub fn image_decode( expr: PyExpr, raise_on_error: Option, diff --git a/src/daft-functions/src/python/misc.rs b/src/daft-functions/src/python/misc.rs index 2b1212b898..b050e0aab4 100644 --- a/src/daft-functions/src/python/misc.rs +++ b/src/daft-functions/src/python/misc.rs @@ -42,7 +42,7 @@ pub fn minhash( Ok(expr.into()) } -#[pyfunction] +#[pyfunction(signature = (expr, seed=None))] pub fn hash(expr: PyExpr, seed: Option) -> PyResult { Ok(crate::hash::hash(expr.into(), seed.map(Into::into)).into()) } diff --git a/src/daft-functions/src/python/mod.rs b/src/daft-functions/src/python/mod.rs index 14e3266373..4032ce9d60 100644 --- a/src/daft-functions/src/python/mod.rs +++ b/src/daft-functions/src/python/mod.rs @@ -26,13 +26,13 @@ mod utf8; use pyo3::{ types::{PyModule, PyModuleMethods}, - wrap_pyfunction_bound, Bound, PyResult, + wrap_pyfunction, Bound, PyResult, }; pub fn register(parent: &Bound) -> PyResult<()> { macro_rules! add { ($p:path) => { - parent.add_function(wrap_pyfunction_bound!($p, parent)?)?; + parent.add_function(wrap_pyfunction!($p, parent)?)?; }; } diff --git a/src/daft-functions/src/python/tokenize.rs b/src/daft-functions/src/python/tokenize.rs index bf5a720230..6af44ad07c 100644 --- a/src/daft-functions/src/python/tokenize.rs +++ b/src/daft-functions/src/python/tokenize.rs @@ -2,7 +2,14 @@ use daft_dsl::python::PyExpr; use daft_io::python::IOConfig; use pyo3::{pyfunction, PyResult}; -#[pyfunction] +#[pyfunction(signature = ( + expr, + tokens_path, + use_special_tokens, + io_config=None, + pattern=None, + special_tokens=None +))] pub fn tokenize_encode( expr: PyExpr, tokens_path: &str, @@ -22,7 +29,13 @@ pub fn tokenize_encode( .into()) } -#[pyfunction] +#[pyfunction(signature = ( + expr, + tokens_path, + io_config=None, + pattern=None, + special_tokens=None +))] pub fn tokenize_decode( expr: PyExpr, tokens_path: &str, diff --git a/src/daft-functions/src/python/uri.rs b/src/daft-functions/src/python/uri.rs index 65f7e1fc8c..d7548c0125 100644 --- a/src/daft-functions/src/python/uri.rs +++ b/src/daft-functions/src/python/uri.rs @@ -26,7 +26,15 @@ pub fn url_download( .into()) } -#[pyfunction] +#[pyfunction(signature = ( + expr, + folder_location, + max_connections, + raise_error_on_failure, + multi_thread, + is_single_folder, + io_config=None +))] pub fn url_upload( expr: PyExpr, folder_location: PyExpr, diff --git a/src/daft-functions/src/python/utf8.rs b/src/daft-functions/src/python/utf8.rs index c09b5081fe..ec64e709ea 100644 --- a/src/daft-functions/src/python/utf8.rs +++ b/src/daft-functions/src/python/utf8.rs @@ -53,7 +53,7 @@ pub fn utf8_to_date(expr: PyExpr, format: &str) -> PyResult { Ok(crate::utf8::to_date(expr.into(), format).into()) } -#[pyfunction] +#[pyfunction(signature = (expr, format, timezone=None))] pub fn utf8_to_datetime(expr: PyExpr, format: &str, timezone: Option<&str>) -> PyResult { Ok(crate::utf8::to_datetime(expr.into(), format, timezone).into()) } diff --git a/src/daft-image/src/python.rs b/src/daft-image/src/python.rs index 6b73b33e54..0ecdba72ba 100644 --- a/src/daft-image/src/python.rs +++ b/src/daft-image/src/python.rs @@ -4,7 +4,7 @@ use daft_core::{ }; use pyo3::{exceptions::PyValueError, prelude::*}; -#[pyfunction] +#[pyfunction(signature = (s, raise_error_on_failure, mode=None))] pub fn decode( s: &PySeries, raise_error_on_failure: bool, @@ -43,7 +43,7 @@ pub fn to_mode(s: &PySeries, mode: &ImageMode) -> PyResult { } pub fn register_modules(parent: &Bound) -> PyResult<()> { - let module = PyModule::new_bound(parent.py(), "image")?; + let module = PyModule::new(parent.py(), "image")?; module.add_wrapped(wrap_pyfunction!(decode))?; module.add_wrapped(wrap_pyfunction!(encode))?; module.add_wrapped(wrap_pyfunction!(resize))?; diff --git a/src/daft-io/src/python.rs b/src/daft-io/src/python.rs index 31b976b562..68ca30f44d 100644 --- a/src/daft-io/src/python.rs +++ b/src/daft-io/src/python.rs @@ -9,7 +9,14 @@ mod py { use crate::{get_io_client, parse_url, s3_like, stats::IOStatsContext}; - #[pyfunction] + #[pyfunction(signature = ( + input, + multithreaded_io=None, + io_config=None, + fanout_limit=None, + page_size=None, + limit=None + ))] fn io_glob( py: Python, input: String, @@ -50,7 +57,7 @@ mod py { }); let mut to_rtn = vec![]; for file in lsr? { - let dict = PyDict::new_bound(py); + let dict = PyDict::new(py); dict.set_item("type", format!("{:?}", file.filetype))?; dict.set_item("path", file.filepath)?; dict.set_item("size", file.size)?; @@ -72,8 +79,8 @@ mod py { pub fn register_modules(parent: &Bound) -> PyResult<()> { common_io_config::python::register_modules(parent)?; - parent.add_function(wrap_pyfunction_bound!(io_glob, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(s3_config_from_env, parent)?)?; + parent.add_function(wrap_pyfunction!(io_glob, parent)?)?; + parent.add_function(wrap_pyfunction!(s3_config_from_env, parent)?)?; Ok(()) } } diff --git a/src/daft-json/src/lib.rs b/src/daft-json/src/lib.rs index 6f935b8e4c..5ae6bec79a 100644 --- a/src/daft-json/src/lib.rs +++ b/src/daft-json/src/lib.rs @@ -71,10 +71,7 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; parent.add_class::()?; parent.add_class::()?; - parent.add_function(wrap_pyfunction_bound!(python::pylib::read_json, parent)?)?; - parent.add_function(wrap_pyfunction_bound!( - python::pylib::read_json_schema, - parent - )?)?; + parent.add_function(wrap_pyfunction!(python::pylib::read_json, parent)?)?; + parent.add_function(wrap_pyfunction!(python::pylib::read_json_schema, parent)?)?; Ok(()) } diff --git a/src/daft-json/src/python.rs b/src/daft-json/src/python.rs index b1bff56031..01deebca3e 100644 --- a/src/daft-json/src/python.rs +++ b/src/daft-json/src/python.rs @@ -9,7 +9,15 @@ pub mod pylib { use crate::{JsonConvertOptions, JsonParseOptions, JsonReadOptions}; #[allow(clippy::too_many_arguments)] - #[pyfunction] + #[pyfunction(signature = ( + uri, + convert_options=None, + parse_options=None, + read_options=None, + io_config=None, + multithreaded_io=None, + max_chunks_in_flight=None + ))] pub fn read_json( py: Python, uri: &str, @@ -41,7 +49,13 @@ pub mod pylib { }) } - #[pyfunction] + #[pyfunction(signature = ( + uri, + parse_options=None, + max_bytes=None, + io_config=None, + multithreaded_io=None + ))] pub fn read_json_schema( py: Python, uri: &str, diff --git a/src/daft-local-execution/src/intermediate_ops/actor_pool_project.rs b/src/daft-local-execution/src/intermediate_ops/actor_pool_project.rs index f7bec63ef5..c19be4374f 100644 --- a/src/daft-local-execution/src/intermediate_ops/actor_pool_project.rs +++ b/src/daft-local-execution/src/intermediate_ops/actor_pool_project.rs @@ -36,7 +36,7 @@ impl ActorHandle { let handle = Python::with_gil(|py| { // create python object Ok::( - py.import_bound(pyo3::intern!(py, "daft.execution.actor_pool_udf"))? + py.import(pyo3::intern!(py, "daft.execution.actor_pool_udf"))? .getattr(pyo3::intern!(py, "ActorHandle"))? .call1((projection .iter() diff --git a/src/daft-local-execution/src/progress_bar.rs b/src/daft-local-execution/src/progress_bar.rs index c5c59b3e06..3b42333d49 100644 --- a/src/daft-local-execution/src/progress_bar.rs +++ b/src/daft-local-execution/src/progress_bar.rs @@ -192,7 +192,7 @@ mod python { pub fn in_notebook() -> bool { pyo3::Python::with_gil(|py| { - py.import_bound(pyo3::intern!(py, "daft.utils")) + py.import(pyo3::intern!(py, "daft.utils")) .and_then(|m| m.getattr(pyo3::intern!(py, "in_notebook"))) .and_then(|m| m.call0()) .and_then(|m| m.extract()) @@ -217,17 +217,17 @@ mod python { #[derive(Clone)] pub struct TqdmProgressBarManager { - inner: PyObject, + inner: Arc, } impl TqdmProgressBarManager { pub fn new() -> Self { Python::with_gil(|py| { - let module = py.import_bound("daft.runners.progress_bar")?; + let module = py.import("daft.runners.progress_bar")?; let progress_bar_class = module.getattr("SwordfishProgressBar")?; let pb_object = progress_bar_class.call0()?; DaftResult::Ok(Self { - inner: pb_object.into(), + inner: Arc::new(pb_object.into()), }) }) .expect("Failed to create progress bar") diff --git a/src/daft-local-execution/src/run.rs b/src/daft-local-execution/src/run.rs index e170661d9c..c3dea72f7a 100644 --- a/src/daft-local-execution/src/run.rs +++ b/src/daft-local-execution/src/run.rs @@ -23,7 +23,9 @@ use { common_daft_config::PyDaftExecutionConfig, daft_logical_plan::PyLogicalPlanBuilder, daft_micropartition::python::PyMicroPartition, - pyo3::{pyclass, pymethods, IntoPy, PyObject, PyRef, PyRefMut, PyResult, Python}, + pyo3::{ + pyclass, pymethods, Bound, IntoPyObject, PyAny, PyObject, PyRef, PyRefMut, PyResult, Python, + }, }; use crate::{ @@ -37,7 +39,7 @@ use crate::{ #[cfg(feature = "python")] #[pyclass] struct LocalPartitionIterator { - iter: Box> + Send>, + iter: Box> + Send + Sync>, } #[cfg(feature = "python")] @@ -75,13 +77,14 @@ impl PyNativeExecutor { }) } - pub fn run( + #[pyo3(signature = (psets, cfg, results_buffer_size=None))] + pub fn run<'a>( &self, - py: Python, + py: Python<'a>, psets: HashMap>, cfg: PyDaftExecutionConfig, results_buffer_size: Option, - ) -> PyResult { + ) -> PyResult> { let native_psets: HashMap> = psets .into_iter() .map(|(part_id, parts)| { @@ -104,10 +107,15 @@ impl PyNativeExecutor { .map(|res| res.into_iter()) })?; let iter = Box::new(out.map(|part| { - part.map(|p| pyo3::Python::with_gil(|py| PyMicroPartition::from(p).into_py(py))) + pyo3::Python::with_gil(|py| { + Ok(PyMicroPartition::from(part?) + .into_pyobject(py)? + .unbind() + .into_any()) + }) })); let part_iter = LocalPartitionIterator { iter }; - Ok(part_iter.into_py(py)) + Ok(part_iter.into_pyobject(py)?.into_any()) } } diff --git a/src/daft-logical-plan/src/builder.rs b/src/daft-logical-plan/src/builder.rs index cb7f228b91..937fb45f44 100644 --- a/src/daft-logical-plan/src/builder.rs +++ b/src/daft-logical-plan/src/builder.rs @@ -534,8 +534,8 @@ impl LogicalPlanBuilder { table_location: String, partition_spec_id: i64, partition_cols: Vec, - iceberg_schema: PyObject, - iceberg_properties: PyObject, + iceberg_schema: Arc, + iceberg_properties: Arc, io_config: Option, catalog_columns: Vec, ) -> DaftResult { @@ -595,7 +595,7 @@ impl LogicalPlanBuilder { columns_name: Vec, mode: String, io_config: Option, - kwargs: PyObject, + kwargs: Arc, ) -> DaftResult { use crate::sink_info::LanceCatalogInfo; @@ -708,7 +708,7 @@ impl PyLogicalPlanBuilder { ) -> PyResult { Ok(LogicalPlanBuilder::in_memory_scan( partition_key, - common_partitioning::PartitionCacheEntry::Python(cache_entry), + common_partitioning::PartitionCacheEntry::Python(Arc::new(cache_entry)), schema.into(), num_partitions, size_bytes, @@ -781,6 +781,7 @@ impl PyLogicalPlanBuilder { .into()) } + #[pyo3(signature = (partition_by, num_partitions=None))] pub fn hash_repartition( &self, partition_by: Vec, @@ -792,6 +793,7 @@ impl PyLogicalPlanBuilder { .into()) } + #[pyo3(signature = (num_partitions=None))] pub fn random_shuffle(&self, num_partitions: Option) -> PyResult { Ok(self.builder.random_shuffle(num_partitions)?.into()) } @@ -804,6 +806,7 @@ impl PyLogicalPlanBuilder { Ok(self.builder.distinct()?.into()) } + #[pyo3(signature = (fraction, with_replacement, seed=None))] pub fn sample( &self, fraction: f64, @@ -843,6 +846,15 @@ impl PyLogicalPlanBuilder { .into()) } #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + right, + left_on, + right_on, + join_type, + join_strategy=None, + join_suffix=None, + join_prefix=None + ))] pub fn join( &self, right: &Self, @@ -881,6 +893,7 @@ impl PyLogicalPlanBuilder { Ok(self.builder.except(&other.builder, is_all)?.into()) } + #[pyo3(signature = (column_name=None))] pub fn add_monotonically_increasing_id(&self, column_name: Option<&str>) -> PyResult { Ok(self .builder @@ -888,6 +901,13 @@ impl PyLogicalPlanBuilder { .into()) } + #[pyo3(signature = ( + root_dir, + file_format, + partition_cols=None, + compression=None, + io_config=None + ))] pub fn table_write( &self, root_dir: &str, @@ -909,6 +929,16 @@ impl PyLogicalPlanBuilder { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + table_name, + table_location, + partition_spec_id, + partition_cols, + iceberg_schema, + iceberg_properties, + catalog_columns, + io_config=None + ))] pub fn iceberg_write( &self, table_name: String, @@ -927,8 +957,8 @@ impl PyLogicalPlanBuilder { table_location, partition_spec_id, pyexprs_to_exprs(partition_cols), - iceberg_schema, - iceberg_properties, + Arc::new(iceberg_schema), + Arc::new(iceberg_properties), io_config.map(|cfg| cfg.config), catalog_columns, )? @@ -936,6 +966,15 @@ impl PyLogicalPlanBuilder { } #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + path, + columns_name, + mode, + version, + large_dtypes, + partition_cols=None, + io_config=None + ))] pub fn delta_write( &self, path: String, @@ -960,6 +999,13 @@ impl PyLogicalPlanBuilder { .into()) } + #[pyo3(signature = ( + path, + columns_name, + mode, + io_config=None, + kwargs=None + ))] pub fn lance_write( &self, py: Python, @@ -969,7 +1015,7 @@ impl PyLogicalPlanBuilder { io_config: Option, kwargs: Option, ) -> PyResult { - let kwargs = kwargs.unwrap_or_else(|| py.None()); + let kwargs = Arc::new(kwargs.unwrap_or_else(|| py.None())); Ok(self .builder .lance_write( diff --git a/src/daft-logical-plan/src/optimization/rules/push_down_limit.rs b/src/daft-logical-plan/src/optimization/rules/push_down_limit.rs index 8c2d8e67bc..46714448b4 100644 --- a/src/daft-logical-plan/src/optimization/rules/push_down_limit.rs +++ b/src/daft-logical-plan/src/optimization/rules/push_down_limit.rs @@ -275,7 +275,7 @@ mod tests { let schema: Arc = Schema::new(vec![Field::new("a", DataType::Int64)])?.into(); let plan = LogicalPlanBuilder::in_memory_scan( "foo", - common_partitioning::PartitionCacheEntry::Python(py_obj), + common_partitioning::PartitionCacheEntry::Python(Arc::new(py_obj)), schema, Default::default(), 5, diff --git a/src/daft-logical-plan/src/sink_info.rs b/src/daft-logical-plan/src/sink_info.rs index c74e67f36e..db3dffac5f 100644 --- a/src/daft-logical-plan/src/sink_info.rs +++ b/src/daft-logical-plan/src/sink_info.rs @@ -1,4 +1,4 @@ -use std::hash::Hash; +use std::{hash::Hash, sync::Arc}; use common_file_formats::FileFormat; use common_io_config::IOConfig; @@ -57,14 +57,14 @@ pub struct IcebergCatalogInfo { )] #[derivative(PartialEq = "ignore")] #[derivative(Hash = "ignore")] - pub iceberg_schema: PyObject, + pub iceberg_schema: Arc, #[serde( serialize_with = "serialize_py_object", deserialize_with = "deserialize_py_object" )] #[derivative(PartialEq = "ignore")] #[derivative(Hash = "ignore")] - pub iceberg_properties: PyObject, + pub iceberg_properties: Arc, pub io_config: Option, } @@ -128,7 +128,7 @@ pub struct LanceCatalogInfo { )] #[derivative(PartialEq = "ignore")] #[derivative(Hash = "ignore")] - pub kwargs: PyObject, + pub kwargs: Arc, } #[cfg(feature = "python")] diff --git a/src/daft-logical-plan/src/source_info/file_info.rs b/src/daft-logical-plan/src/source_info/file_info.rs index 482298b23e..eef1b2c4db 100644 --- a/src/daft-logical-plan/src/source_info/file_info.rs +++ b/src/daft-logical-plan/src/source_info/file_info.rs @@ -16,6 +16,7 @@ pub struct FileInfo { #[pymethods] impl FileInfo { #[new] + #[pyo3(signature = (file_path, file_size=None, num_rows=None))] pub fn new(file_path: String, file_size: Option, num_rows: Option) -> Self { Self::new_internal(file_path, file_size, num_rows) } diff --git a/src/daft-micropartition/src/python.rs b/src/daft-micropartition/src/python.rs index e062abb2b5..6ed01c7a7a 100644 --- a/src/daft-micropartition/src/python.rs +++ b/src/daft-micropartition/src/python.rs @@ -105,6 +105,7 @@ impl PyMicroPartition { } #[staticmethod] + #[pyo3(signature = (schema=None))] pub fn empty(schema: Option) -> PyResult { Ok(MicroPartition::empty(match schema { Some(s) => Some(s.schema), @@ -264,6 +265,13 @@ impl PyMicroPartition { }) } + #[pyo3(signature = ( + right, + left_on, + right_on, + how, + null_equals_nulls=None + ))] pub fn hash_join( &self, py: Python, @@ -368,6 +376,7 @@ impl PyMicroPartition { }) } + #[pyo3(signature = (fraction, with_replacement, seed=None))] pub fn sample_by_fraction( &self, py: Python, @@ -393,6 +402,7 @@ impl PyMicroPartition { }) } + #[pyo3(signature = (size, with_replacement, seed=None))] pub fn sample_by_size( &self, py: Python, @@ -530,6 +540,13 @@ impl PyMicroPartition { } #[staticmethod] + #[pyo3(signature = ( + uri, + schema, + storage_config, + include_columns=None, + num_rows=None + ))] pub fn read_json( py: Python, uri: &str, @@ -555,6 +572,14 @@ impl PyMicroPartition { } #[staticmethod] + #[pyo3(signature = ( + uri, + convert_options=None, + parse_options=None, + read_options=None, + io_config=None, + multithreaded_io=None + ))] pub fn read_json_native( py: Python, uri: &str, @@ -582,6 +607,14 @@ impl PyMicroPartition { } #[staticmethod] + #[pyo3(signature = ( + uri, + convert_options=None, + parse_options=None, + read_options=None, + io_config=None, + multithreaded_io=None + ))] pub fn read_csv( py: Python, uri: &str, @@ -609,6 +642,17 @@ impl PyMicroPartition { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = ( + uri, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + predicate=None, + io_config=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None + ))] pub fn read_parquet( py: Python, uri: &str, @@ -655,6 +699,19 @@ impl PyMicroPartition { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = ( + uris, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + predicate=None, + io_config=None, + num_parallel_tasks=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None, + chunk_size=None + ))] pub fn read_parquet_bulk( py: Python, uris: Vec, @@ -753,17 +810,16 @@ impl PyMicroPartition { } pub fn __reduce__(&self, py: Python) -> PyResult<(PyObject, PyObject)> { - let schema_bytes = PyBytes::new_bound(py, &bincode::serialize(&self.inner.schema).unwrap()); + let schema_bytes = PyBytes::new(py, &bincode::serialize(&self.inner.schema).unwrap()); let py_metadata_bytes = - PyBytes::new_bound(py, &bincode::serialize(&self.inner.metadata).unwrap()); - let py_stats_bytes = - PyBytes::new_bound(py, &bincode::serialize(&self.inner.statistics).unwrap()); + PyBytes::new(py, &bincode::serialize(&self.inner.metadata).unwrap()); + let py_stats_bytes = PyBytes::new(py, &bincode::serialize(&self.inner.statistics).unwrap()); let guard = self.inner.state.lock().unwrap(); if let TableState::Loaded(tables) = &*guard { let _from_pytable = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "Table"))? .getattr(pyo3::intern!(py, "_from_pytable"))?; @@ -772,15 +828,17 @@ impl PyMicroPartition { .map(|pt| _from_pytable.call1((pt,))) .collect::>>()?; Ok(( - Self::type_object_bound(py) + Self::type_object(py) .getattr(pyo3::intern!(py, "_from_loaded_table_state"))? .into(), - (schema_bytes, pyobjs, py_metadata_bytes, py_stats_bytes).to_object(py), + (schema_bytes, pyobjs, py_metadata_bytes, py_stats_bytes) + .into_pyobject(py)? + .into(), )) } else if let TableState::Unloaded(params) = &*guard { - let py_params_bytes = PyBytes::new_bound(py, &bincode::serialize(params).unwrap()); + let py_params_bytes = PyBytes::new(py, &bincode::serialize(params).unwrap()); Ok(( - Self::type_object_bound(py) + Self::type_object(py) .getattr(pyo3::intern!(py, "_from_unloaded_table_state"))? .into(), ( @@ -789,7 +847,8 @@ impl PyMicroPartition { py_metadata_bytes, py_stats_bytes, ) - .to_object(py), + .into_pyobject(py)? + .into(), )) } else { unreachable!() @@ -806,15 +865,15 @@ pub fn read_json_into_py_table( num_rows: Option, ) -> PyResult { let read_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableReadOptions"))? .call1((num_rows, include_columns))?; let py_schema = py - .import_bound(pyo3::intern!(py, "daft.logical.schema"))? + .import(pyo3::intern!(py, "daft.logical.schema"))? .getattr(pyo3::intern!(py, "Schema"))? .getattr(pyo3::intern!(py, "_from_pyschema"))? .call1((schema,))?; - py.import_bound(pyo3::intern!(py, "daft.table.table_io"))? + py.import(pyo3::intern!(py, "daft.table.table_io"))? .getattr(pyo3::intern!(py, "read_json"))? .call1((uri, py_schema, storage_config, read_options))? .getattr(pyo3::intern!(py, "to_table"))? @@ -836,20 +895,20 @@ pub fn read_csv_into_py_table( num_rows: Option, ) -> PyResult { let py_schema = py - .import_bound(pyo3::intern!(py, "daft.logical.schema"))? + .import(pyo3::intern!(py, "daft.logical.schema"))? .getattr(pyo3::intern!(py, "Schema"))? .getattr(pyo3::intern!(py, "_from_pyschema"))? .call1((schema,))?; let read_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableReadOptions"))? .call1((num_rows, include_columns))?; let header_idx = if has_header { Some(0) } else { None }; let parse_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableParseCSVOptions"))? .call1((delimiter, header_idx, double_quote))?; - py.import_bound(pyo3::intern!(py, "daft.table.table_io"))? + py.import(pyo3::intern!(py, "daft.table.table_io"))? .getattr(pyo3::intern!(py, "read_csv"))? .call1((uri, py_schema, storage_config, parse_options, read_options))? .getattr(pyo3::intern!(py, "to_table"))? @@ -868,24 +927,24 @@ pub fn read_parquet_into_py_table( num_rows: Option, ) -> PyResult { let py_schema = py - .import_bound(pyo3::intern!(py, "daft.logical.schema"))? + .import(pyo3::intern!(py, "daft.logical.schema"))? .getattr(pyo3::intern!(py, "Schema"))? .getattr(pyo3::intern!(py, "_from_pyschema"))? .call1((schema,))?; let read_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableReadOptions"))? .call1((num_rows, include_columns))?; let py_coerce_int96_timestamp_unit = py - .import_bound(pyo3::intern!(py, "daft.datatype"))? + .import(pyo3::intern!(py, "daft.datatype"))? .getattr(pyo3::intern!(py, "TimeUnit"))? .getattr(pyo3::intern!(py, "_from_pytimeunit"))? .call1((coerce_int96_timestamp_unit,))?; let parse_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableParseParquetOptions"))? .call1((py_coerce_int96_timestamp_unit,))?; - py.import_bound(pyo3::intern!(py, "daft.table.table_io"))? + py.import(pyo3::intern!(py, "daft.table.table_io"))? .getattr(pyo3::intern!(py, "read_parquet"))? .call1((uri, py_schema, storage_config, read_options, parse_options))? .getattr(pyo3::intern!(py, "to_table"))? @@ -904,13 +963,13 @@ pub fn read_sql_into_py_table( num_rows: Option, ) -> PyResult { let py_schema = py - .import_bound(pyo3::intern!(py, "daft.logical.schema"))? + .import(pyo3::intern!(py, "daft.logical.schema"))? .getattr(pyo3::intern!(py, "Schema"))? .getattr(pyo3::intern!(py, "_from_pyschema"))? .call1((schema,))?; let py_predicate = match predicate { Some(p) => Some( - py.import_bound(pyo3::intern!(py, "daft.expressions.expressions"))? + py.import(pyo3::intern!(py, "daft.expressions.expressions"))? .getattr(pyo3::intern!(py, "Expression"))? .getattr(pyo3::intern!(py, "_from_pyexpr"))? .call1((p,))?, @@ -918,10 +977,10 @@ pub fn read_sql_into_py_table( None => None, }; let read_options = py - .import_bound(pyo3::intern!(py, "daft.runners.partitioning"))? + .import(pyo3::intern!(py, "daft.runners.partitioning"))? .getattr(pyo3::intern!(py, "TableReadOptions"))? .call1((num_rows, include_columns))?; - py.import_bound(pyo3::intern!(py, "daft.table.table_io"))? + py.import(pyo3::intern!(py, "daft.table.table_io"))? .getattr(pyo3::intern!(py, "read_sql"))? .call1((sql, conn, py_schema, read_options, py_predicate))? .getattr(pyo3::intern!(py, "to_table"))? @@ -943,11 +1002,11 @@ pub fn read_pyfunc_into_table_iter( .. } => { Python::with_gil(|py| { - let func = py.import_bound(module.as_str()) + let func = py.import(module.as_str()) .unwrap_or_else(|_| panic!("Cannot import factory function from module {module}")) .getattr(func_name.as_str()) .unwrap_or_else(|_| panic!("Cannot find function {func_name} in module {module}")); - func.call(func_args.to_pytuple(py), None) + func.call(func_args.to_pytuple(py).with_context(|_| PyIOSnafu)?, None) .with_context(|_| PyIOSnafu) .map(Into::::into) }) diff --git a/src/daft-parquet/src/python.rs b/src/daft-parquet/src/python.rs index 036d09df02..88763798fc 100644 --- a/src/daft-parquet/src/python.rs +++ b/src/daft-parquet/src/python.rs @@ -14,7 +14,17 @@ pub mod pylib { ArrowChunk, ParquetSchemaInferenceOptions, ParquetSchemaInferenceOptionsBuilder, }; #[allow(clippy::too_many_arguments)] - #[pyfunction] + #[pyfunction(signature = ( + uri, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + predicate=None, + io_config=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None + ))] pub fn read_parquet( py: Python, uri: &str, @@ -88,7 +98,18 @@ pub mod pylib { } #[allow(clippy::too_many_arguments)] - #[pyfunction] + #[pyfunction(signature = ( + uri, + string_encoding, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + io_config=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None, + file_timeout_ms=None + ))] pub fn read_parquet_into_pyarrow( py: Python, uri: &str, @@ -126,11 +147,22 @@ pub mod pylib { file_timeout_ms, ) })?; - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; convert_pyarrow_parquet_read_result_into_py(py, schema, all_arrays, num_rows, &pyarrow) } #[allow(clippy::too_many_arguments)] - #[pyfunction] + #[pyfunction(signature = ( + uris, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + predicate=None, + io_config=None, + num_parallel_tasks=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None + ))] pub fn read_parquet_bulk( py: Python, uris: Vec, @@ -178,7 +210,17 @@ pub mod pylib { } #[allow(clippy::too_many_arguments)] - #[pyfunction] + #[pyfunction(signature = ( + uris, + columns=None, + start_offset=None, + num_rows=None, + row_groups=None, + io_config=None, + num_parallel_tasks=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None + ))] pub fn read_parquet_into_pyarrow_bulk( py: Python, uris: Vec, @@ -213,7 +255,7 @@ pub mod pylib { schema_infer_options, ) })?; - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; parquet_read_results .into_iter() .map(|(s, all_arrays, num_rows)| { @@ -222,7 +264,12 @@ pub mod pylib { .collect::>>() } - #[pyfunction] + #[pyfunction(signature = ( + uri, + io_config=None, + multithreaded_io=None, + coerce_int96_timestamp_unit=None + ))] pub fn read_parquet_schema( py: Python, uri: &str, @@ -260,7 +307,7 @@ pub mod pylib { }) } - #[pyfunction] + #[pyfunction(signature = (uris, io_config=None, multithreaded_io=None))] pub fn read_parquet_statistics( py: Python, uris: PySeries, @@ -287,20 +334,14 @@ pub mod pylib { } } pub fn register_modules(parent: &Bound) -> PyResult<()> { - parent.add_function(wrap_pyfunction_bound!(pylib::read_parquet, parent)?)?; - parent.add_function(wrap_pyfunction_bound!( - pylib::read_parquet_into_pyarrow, - parent - )?)?; - parent.add_function(wrap_pyfunction_bound!( + parent.add_function(wrap_pyfunction!(pylib::read_parquet, parent)?)?; + parent.add_function(wrap_pyfunction!(pylib::read_parquet_into_pyarrow, parent)?)?; + parent.add_function(wrap_pyfunction!( pylib::read_parquet_into_pyarrow_bulk, parent )?)?; - parent.add_function(wrap_pyfunction_bound!(pylib::read_parquet_bulk, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(pylib::read_parquet_schema, parent)?)?; - parent.add_function(wrap_pyfunction_bound!( - pylib::read_parquet_statistics, - parent - )?)?; + parent.add_function(wrap_pyfunction!(pylib::read_parquet_bulk, parent)?)?; + parent.add_function(wrap_pyfunction!(pylib::read_parquet_schema, parent)?)?; + parent.add_function(wrap_pyfunction!(pylib::read_parquet_statistics, parent)?)?; Ok(()) } diff --git a/src/daft-scan/src/builder.rs b/src/daft-scan/src/builder.rs index b154f20d92..68c67cbd20 100644 --- a/src/daft-scan/src/builder.rs +++ b/src/daft-scan/src/builder.rs @@ -280,12 +280,14 @@ pub fn delta_scan>( }; // let py_io_config = PyIOConfig { config: io_config }; - let delta_lake_scan = PyModule::import_bound(py, "daft.delta_lake.delta_lake_scan")?; + let delta_lake_scan = PyModule::import(py, "daft.delta_lake.delta_lake_scan")?; let delta_lake_scan_operator = delta_lake_scan.getattr(pyo3::intern!(py, "DeltaLakeScanOperator"))?; let delta_lake_operator = delta_lake_scan_operator .call1((glob_path.as_ref(), storage_config))? - .to_object(py); + .into_pyobject(py) + .unwrap() + .into(); let scan_operator_handle = ScanOperatorHandle::from_python_scan_operator(delta_lake_operator, py)?; LogicalPlanBuilder::table_scan(scan_operator_handle.into(), None) diff --git a/src/daft-scan/src/python.rs b/src/daft-scan/src/python.rs index d6e0665047..a13d16cfdc 100644 --- a/src/daft-scan/src/python.rs +++ b/src/daft-scan/src/python.rs @@ -1,4 +1,7 @@ -use std::hash::{Hash, Hasher}; +use std::{ + hash::{Hash, Hasher}, + sync::Arc, +}; use common_py_serde::{deserialize_py_object, serialize_py_object}; use pyo3::{prelude::*, types::PyTuple}; @@ -12,7 +15,7 @@ struct PyObjectSerializableWrapper( serialize_with = "serialize_py_object", deserialize_with = "deserialize_py_object" )] - pub PyObject, + pub Arc, ); /// Python arguments to a Python function that produces Tables @@ -29,7 +32,7 @@ impl Hash for PythonTablesFactoryArgs { } impl PythonTablesFactoryArgs { - pub fn new(args: Vec) -> Self { + pub fn new(args: Vec>) -> Self { let mut hasher = std::collections::hash_map::DefaultHasher::new(); Python::with_gil(|py| { for obj in &args { @@ -45,9 +48,8 @@ impl PythonTablesFactoryArgs { } } - #[must_use] - pub fn to_pytuple<'a>(&self, py: Python<'a>) -> Bound<'a, PyTuple> { - pyo3::types::PyTuple::new_bound(py, self.args.iter().map(|x| x.0.bind(py))) + pub fn to_pytuple<'a>(&self, py: Python<'a>) -> PyResult> { + pyo3::types::PyTuple::new(py, self.args.iter().map(|x| x.0.bind(py))) } } @@ -78,11 +80,7 @@ pub mod pylib { use daft_schema::{python::schema::PySchema, schema::SchemaRef}; use daft_stats::{PartitionSpec, TableMetadata, TableStatistics}; use daft_table::{python::PyTable, Table}; - use pyo3::{ - prelude::*, - pyclass, - types::{PyIterator, PyList}, - }; + use pyo3::{prelude::*, pyclass, types::PyIterator}; use serde::{Deserialize, Serialize}; use super::PythonTablesFactoryArgs; @@ -126,6 +124,15 @@ pub mod pylib { #[staticmethod] #[allow(clippy::too_many_arguments)] + #[pyo3(signature = ( + glob_path, + file_format_config, + storage_config, + hive_partitioning, + infer_schema, + schema=None, + file_path_column=None + ))] pub fn glob_scan( py: Python, glob_path: Vec, @@ -186,10 +193,10 @@ pub mod pylib { } fn _partitioning_keys(abc: &PyObject, py: Python) -> PyResult> { let result = abc.call_method0(py, pyo3::intern!(py, "partitioning_keys"))?; - let result = result.extract::<&PyList>(py)?; result - .into_iter() - .map(|p| Ok(p.extract::()?.0.as_ref().clone())) + .bind(py) + .try_iter()? + .map(|p| Ok(p?.extract::()?.0.as_ref().clone())) .collect() } @@ -280,11 +287,11 @@ pub mod pylib { fn to_scan_tasks(&self, pushdowns: Pushdowns) -> DaftResult> { let scan_tasks = Python::with_gil(|py| { - let pypd = PyPushdowns(pushdowns.clone().into()).into_py(py); + let pypd = PyPushdowns(pushdowns.clone().into()).into_pyobject(py)?; let pyiter = self.operator .call_method1(py, pyo3::intern!(py, "to_scan_tasks"), (pypd,))?; - let pyiter = PyIterator::from_bound_object(pyiter.bind(py))?; + let pyiter = PyIterator::from_object(pyiter.bind(py))?; DaftResult::Ok( pyiter .map(|v| { @@ -340,6 +347,18 @@ pub mod pylib { impl PyScanTask { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = ( + file, + file_format, + schema, + storage_config, + num_rows=None, + size_bytes=None, + iceberg_delete_files=None, + pushdowns=None, + partition_values=None, + stats=None + ))] pub fn catalog_scan_task( file: String, file_format: PyFileFormatConfig, @@ -404,6 +423,16 @@ pub mod pylib { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = ( + url, + file_format, + schema, + storage_config, + num_rows=None, + size_bytes=None, + pushdowns=None, + stats=None + ))] pub fn sql_scan_task( url: String, file_format: PyFileFormatConfig, @@ -437,11 +466,20 @@ pub mod pylib { #[allow(clippy::too_many_arguments)] #[staticmethod] + #[pyo3(signature = ( + module, + func_name, + func_args, + schema, + num_rows=None, + size_bytes=None, + pushdowns=None, + stats=None + ))] pub fn python_factory_func_scan_task( - py: Python, module: String, func_name: String, - func_args: Vec>, + func_args: Vec, schema: PySchema, num_rows: Option, size_bytes: Option, @@ -455,7 +493,7 @@ pub mod pylib { module, func_name, func_args: PythonTablesFactoryArgs::new( - func_args.iter().map(|pyany| pyany.into_py(py)).collect(), + func_args.into_iter().map(Arc::new).collect(), ), size_bytes, metadata: num_rows.map(|num_rows| TableMetadata { @@ -523,7 +561,7 @@ pub mod pylib { /// /// Returns an `i64` representing the estimated size in bytes. /// - #[pyfunction] + #[pyfunction(signature = (uri, file_size, columns=None, has_metadata=None))] pub fn estimate_in_memory_size_bytes( uri: &str, file_size: u64, @@ -572,16 +610,13 @@ pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; parent.add_class::()?; - parent.add_function(wrap_pyfunction_bound!( - pylib::logical_plan_table_scan, - parent - )?)?; + parent.add_function(wrap_pyfunction!(pylib::logical_plan_table_scan, parent)?)?; Ok(()) } pub fn register_testing_modules(parent: &Bound) -> PyResult<()> { - parent.add_function(wrap_pyfunction_bound!( + parent.add_function(wrap_pyfunction!( pylib::estimate_in_memory_size_bytes, parent )?)?; diff --git a/src/daft-scan/src/storage_config.rs b/src/daft-scan/src/storage_config.rs index 15efe8f177..270964b705 100644 --- a/src/daft-scan/src/storage_config.rs +++ b/src/daft-scan/src/storage_config.rs @@ -70,6 +70,7 @@ impl Default for StorageConfig { impl StorageConfig { #[new] #[must_use] + #[pyo3(signature = (multithreaded_io, io_config=None))] pub fn new(multithreaded_io: bool, io_config: Option) -> Self { Self::new_internal(multithreaded_io, io_config.map(|c| c.config)) } diff --git a/src/daft-scheduler/src/adaptive.rs b/src/daft-scheduler/src/adaptive.rs index d3b27ca310..4b58a1b9b1 100644 --- a/src/daft-scheduler/src/adaptive.rs +++ b/src/daft-scheduler/src/adaptive.rs @@ -67,7 +67,7 @@ impl AdaptivePhysicalPlanScheduler { let in_memory_info = InMemoryInfo::new( Schema::empty().into(), // TODO thread in schema from in memory scan partition_key.into(), - PartitionCacheEntry::Python(cache_entry), + PartitionCacheEntry::Python(Arc::new(cache_entry)), num_partitions, size_bytes, num_rows, diff --git a/src/daft-scheduler/src/scheduler.rs b/src/daft-scheduler/src/scheduler.rs index 81d528ce58..56662dac31 100644 --- a/src/daft-scheduler/src/scheduler.rs +++ b/src/daft-scheduler/src/scheduler.rs @@ -30,8 +30,11 @@ use { daft_dsl::python::PyExpr, daft_logical_plan::{OutputFileInfo, PyLogicalPlanBuilder}, daft_scan::python::pylib::PyScanTask, - pyo3::{pyclass, pymethods, types::PyAnyMethods, PyObject, PyRef, PyRefMut, PyResult, Python}, - std::collections::HashMap, + pyo3::{ + pyclass, pymethods, + types::{PyAnyMethods, PyDict, PyList}, + Bound, Py, PyAny, PyObject, PyRef, PyRefMut, PyResult, Python, + }, }; /// A work scheduler for physical plans. @@ -88,7 +91,7 @@ impl PhysicalPlanScheduler { pub fn to_partition_tasks( &self, py: Python, - psets: HashMap>, + psets: Bound, actor_pool_manager: PyObject, ) -> PyResult { physical_plan_to_partition_tasks(self.plan().as_ref(), py, &psets, &actor_pool_manager) @@ -98,7 +101,7 @@ impl PhysicalPlanScheduler { #[cfg(feature = "python")] #[pyclass] struct StreamingPartitionIterator { - iter: Box> + Send>, + iter: Box> + Send + Sync>, } #[cfg(feature = "python")] @@ -126,7 +129,7 @@ impl From for PhysicalPlanScheduler { #[cfg(feature = "python")] #[pyclass] struct PartitionIterator { - parts: Vec, + parts: Py, index: usize, } @@ -136,10 +139,10 @@ impl PartitionIterator { fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { slf } - fn __next__(mut slf: PyRefMut<'_, Self>) -> Option { + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option> { let index = slf.index; slf.index += 1; - slf.parts.get(index).map(|part| part.clone_ref(slf.py())) + slf.parts.bind(slf.py()).get_item(index).ok() } } @@ -161,7 +164,7 @@ fn tabular_write( io_config: &Option, ) -> PyResult { let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "write_file"))? .call1(( upstream_iter, @@ -187,13 +190,13 @@ fn iceberg_write( iceberg_info: &IcebergCatalogInfo, ) -> PyResult { let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "write_iceberg"))? .call1(( upstream_iter, &iceberg_info.table_location, - &iceberg_info.iceberg_schema, - &iceberg_info.iceberg_properties, + &iceberg_info.iceberg_schema.clone_ref(py), + &iceberg_info.iceberg_properties.clone_ref(py), iceberg_info.partition_spec_id, exprs_to_pyexprs(&iceberg_info.partition_cols), iceberg_info @@ -214,7 +217,7 @@ fn deltalake_write( delta_lake_info: &DeltaLakeCatalogInfo, ) -> PyResult { let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "write_deltalake"))? .call1(( upstream_iter, @@ -239,7 +242,7 @@ fn lance_write( lance_info: &LanceCatalogInfo, ) -> PyResult { let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "write_lance"))? .call1(( upstream_iter, @@ -251,7 +254,7 @@ fn lance_write( .map(|cfg| common_io_config::python::IOConfig { config: cfg.clone(), }), - lance_info.kwargs.clone(), + lance_info.kwargs.clone_ref(py), ))?; Ok(py_iter.into()) } @@ -260,7 +263,7 @@ fn lance_write( fn physical_plan_to_partition_tasks( physical_plan: &PhysicalPlan, py: Python, - psets: &HashMap>, + psets: &Bound, actor_pool_manager: &PyObject, ) -> PyResult { use daft_dsl::Expr; @@ -272,18 +275,18 @@ fn physical_plan_to_partition_tasks( .. }) => { let partition_iter = PartitionIterator { - parts: psets[cache_key].clone(), + parts: psets.get_item(cache_key)?.extract()?, index: 0usize, }; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "partition_read"))? .call1((partition_iter,))?; Ok(py_iter.into()) } PhysicalPlan::TabularScan(TabularScan { scan_tasks, .. }) => { let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "scan_with_tasks"))? .call1((scan_tasks .iter() @@ -292,7 +295,7 @@ fn physical_plan_to_partition_tasks( Ok(py_iter.into()) } PhysicalPlan::EmptyScan(EmptyScan { schema, .. }) => { - let schema_mod = py.import_bound(pyo3::intern!(py, "daft.logical.schema"))?; + let schema_mod = py.import(pyo3::intern!(py, "daft.logical.schema"))?; let python_schema = schema_mod .getattr(pyo3::intern!(py, "Schema"))? .getattr(pyo3::intern!(py, "_from_pyschema"))? @@ -301,7 +304,7 @@ fn physical_plan_to_partition_tasks( },))?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "empty_scan"))? .call1((python_schema,))?; Ok(py_iter.into()) @@ -319,7 +322,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "project"))? .call1(( upstream_iter, @@ -337,7 +340,7 @@ fn physical_plan_to_partition_tasks( let upstream_iter = physical_plan_to_partition_tasks(input, py, psets, actor_pool_manager)?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "actor_pool_project"))? .call1(( upstream_iter, @@ -355,8 +358,7 @@ fn physical_plan_to_partition_tasks( PhysicalPlan::Filter(Filter { input, predicate }) => { let upstream_iter = physical_plan_to_partition_tasks(input, py, psets, actor_pool_manager)?; - let expressions_mod = - py.import_bound(pyo3::intern!(py, "daft.expressions.expressions"))?; + let expressions_mod = py.import(pyo3::intern!(py, "daft.expressions.expressions"))?; let py_predicate = expressions_mod .getattr(pyo3::intern!(py, "Expression"))? .getattr(pyo3::intern!(py, "_from_pyexpr"))? @@ -365,7 +367,7 @@ fn physical_plan_to_partition_tasks( .getattr(pyo3::intern!(py, "ExpressionsProjection"))? .call1((vec![py_predicate],))?; let execution_step_mod = - py.import_bound(pyo3::intern!(py, "daft.execution.execution_step"))?; + py.import(pyo3::intern!(py, "daft.execution.execution_step"))?; let filter_step = execution_step_mod .getattr(pyo3::intern!(py, "Filter"))? .call1((expressions_projection,))?; @@ -373,7 +375,7 @@ fn physical_plan_to_partition_tasks( .getattr(pyo3::intern!(py, "ResourceRequest"))? .call0()?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "pipeline_instruction"))? .call1((upstream_iter, filter_step, resource_request))?; Ok(py_iter.into()) @@ -386,8 +388,7 @@ fn physical_plan_to_partition_tasks( }) => { let upstream_iter = physical_plan_to_partition_tasks(input, py, psets, actor_pool_manager)?; - let py_physical_plan = - py.import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))?; + let py_physical_plan = py.import(pyo3::intern!(py, "daft.execution.physical_plan"))?; let global_limit_iter = py_physical_plan .getattr(pyo3::intern!(py, "global_limit"))? .call1((upstream_iter, *limit, *eager, *num_partitions))?; @@ -403,7 +404,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "explode"))? .call1((upstream_iter, explode_pyexprs))?; Ok(py_iter.into()) @@ -425,7 +426,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "unpivot"))? .call1(( upstream_iter, @@ -445,7 +446,7 @@ fn physical_plan_to_partition_tasks( let upstream_iter = physical_plan_to_partition_tasks(input, py, psets, actor_pool_manager)?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "sample"))? .call1((upstream_iter, *fraction, *with_replacement, *seed))?; Ok(py_iter.into()) @@ -457,7 +458,7 @@ fn physical_plan_to_partition_tasks( let upstream_iter = physical_plan_to_partition_tasks(input, py, psets, actor_pool_manager)?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "monotonically_increasing_id"))? .call1((upstream_iter, column_name))?; Ok(py_iter.into()) @@ -476,7 +477,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "sort"))? .call1(( upstream_iter, @@ -500,19 +501,16 @@ fn physical_plan_to_partition_tasks( .iter() .map(|expr| PyExpr::from(expr.clone())) .collect(); - py.import_bound(pyo3::intern!( - py, - "daft.execution.rust_physical_plan_shim" - ))? - .getattr(pyo3::intern!(py, "fanout_by_hash"))? - .call1(( - upstream_iter, - hash_clustering_config.num_partitions, - partition_by_pyexprs, - ))? + py.import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .getattr(pyo3::intern!(py, "fanout_by_hash"))? + .call1(( + upstream_iter, + hash_clustering_config.num_partitions, + partition_by_pyexprs, + ))? } daft_logical_plan::ClusteringSpec::Random(random_clustering_config) => py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "fanout_random"))? .call1((upstream_iter, random_clustering_config.num_partitions()))?, daft_logical_plan::ClusteringSpec::Range(_) => { @@ -523,7 +521,7 @@ fn physical_plan_to_partition_tasks( } }; let reduced = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "reduce_merge"))? .call1((mapped,))?; Ok(reduced.into()) @@ -533,7 +531,7 @@ fn physical_plan_to_partition_tasks( pre_shuffle_merge_threshold, } => { let merged = py - .import_bound(pyo3::intern!( + .import(pyo3::intern!( py, "daft.execution.shuffles.pre_shuffle_merge" ))? @@ -546,19 +544,16 @@ fn physical_plan_to_partition_tasks( .iter() .map(|expr| PyExpr::from(expr.clone())) .collect(); - py.import_bound(pyo3::intern!( - py, - "daft.execution.rust_physical_plan_shim" - ))? - .getattr(pyo3::intern!(py, "fanout_by_hash"))? - .call1(( - merged, - hash_clustering_config.num_partitions, - partition_by_pyexprs, - ))? + py.import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .getattr(pyo3::intern!(py, "fanout_by_hash"))? + .call1(( + merged, + hash_clustering_config.num_partitions, + partition_by_pyexprs, + ))? } daft_logical_plan::ClusteringSpec::Random(random_clustering_config) => py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "fanout_random"))? .call1((merged, random_clustering_config.num_partitions()))?, daft_logical_plan::ClusteringSpec::Range(_) => { @@ -569,7 +564,7 @@ fn physical_plan_to_partition_tasks( } }; let reduced = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "reduce_merge"))? .call1((mapped,))?; Ok(reduced.into()) @@ -582,7 +577,7 @@ fn physical_plan_to_partition_tasks( std::cmp::Ordering::Greater => { // Split if more outputs than inputs let split = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "split"))? .call1(( upstream_iter, @@ -590,7 +585,7 @@ fn physical_plan_to_partition_tasks( *target_num_partitions, ))?; let flattened = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "flatten_plan"))? .call1((split,))?; Ok(flattened.into()) @@ -598,7 +593,7 @@ fn physical_plan_to_partition_tasks( std::cmp::Ordering::Less => { // Coalesce if fewer outputs than inputs let coalesced = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "coalesce"))? .call1(( upstream_iter, @@ -628,7 +623,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "local_aggregate"))? .call1((upstream_iter, aggs_as_pyexprs, groupbys_as_pyexprs))?; Ok(py_iter.into()) @@ -649,7 +644,7 @@ fn physical_plan_to_partition_tasks( let pivot_column_pyexpr = PyExpr::from(pivot_column.clone()); let value_column_pyexpr = PyExpr::from(value_column.clone()); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "pivot"))? .call1(( upstream_iter, @@ -666,7 +661,7 @@ fn physical_plan_to_partition_tasks( let upstream_other_iter = physical_plan_to_partition_tasks(other, py, psets, actor_pool_manager)?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "concat"))? .call1((upstream_input_iter, upstream_other_iter))?; Ok(py_iter.into()) @@ -693,7 +688,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "hash_join"))? .call1(( upstream_left_iter, @@ -728,7 +723,7 @@ fn physical_plan_to_partition_tasks( .collect(); // TODO(Clark): Elide sorting one side of the join if already range-partitioned, where we'd use that side's boundaries to sort the other side. let py_iter = if *needs_presort { - py.import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + py.import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "sort_merge_join_aligned_boundaries"))? .call1(( left_iter, @@ -740,7 +735,7 @@ fn physical_plan_to_partition_tasks( *left_is_larger, ))? } else { - py.import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + py.import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "merge_join_sorted"))? .call1(( left_iter, @@ -775,7 +770,7 @@ fn physical_plan_to_partition_tasks( .map(|expr| PyExpr::from(expr.clone())) .collect(); let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? + .import(pyo3::intern!(py, "daft.execution.rust_physical_plan_shim"))? .getattr(pyo3::intern!(py, "broadcast_join"))? .call1(( upstream_left_iter, @@ -799,7 +794,7 @@ fn physical_plan_to_partition_tasks( let upstream_right_iter = physical_plan_to_partition_tasks(right, py, psets, actor_pool_manager)?; let py_iter = py - .import_bound(pyo3::intern!(py, "daft.execution.physical_plan"))? + .import(pyo3::intern!(py, "daft.execution.physical_plan"))? .getattr(pyo3::intern!(py, "cross_join"))? .call1((upstream_left_iter, upstream_right_iter, *outer_loop_side))?; Ok(py_iter.into()) diff --git a/src/daft-schema/src/image_format.rs b/src/daft-schema/src/image_format.rs index 0aeb8432de..067e9f04a9 100644 --- a/src/daft-schema/src/image_format.rs +++ b/src/daft-schema/src/image_format.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; /// Supported image formats for Daft's I/O layer. #[allow(clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash)] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum ImageFormat { PNG, JPEG, diff --git a/src/daft-schema/src/image_mode.rs b/src/daft-schema/src/image_mode.rs index e75e90bf28..6305834f92 100644 --- a/src/daft-schema/src/image_mode.rs +++ b/src/daft-schema/src/image_mode.rs @@ -31,7 +31,7 @@ use crate::dtype::DataType; #[derive( Clone, Copy, Debug, Display, PartialEq, Eq, Serialize, Deserialize, Hash, FromPrimitive, )] -#[cfg_attr(feature = "python", pyclass(module = "daft.daft"))] +#[cfg_attr(feature = "python", pyclass(module = "daft.daft", eq, eq_int))] pub enum ImageMode { L = 1, LA = 2, diff --git a/src/daft-schema/src/python/datatype.rs b/src/daft-schema/src/python/datatype.rs index 2aad609ad7..9922953f98 100644 --- a/src/daft-schema/src/python/datatype.rs +++ b/src/daft-schema/src/python/datatype.rs @@ -182,6 +182,7 @@ impl PyDataType { } #[staticmethod] + #[pyo3(signature = (timeunit, timezone=None))] pub fn timestamp(timeunit: PyTimeUnit, timezone: Option) -> PyResult { Ok(DataType::Timestamp(timeunit.timeunit, timezone).into()) } @@ -232,6 +233,7 @@ impl PyDataType { } #[staticmethod] + #[pyo3(signature = (name, storage_data_type, metadata=None))] pub fn extension( name: &str, storage_data_type: Self, @@ -263,6 +265,7 @@ impl PyDataType { } #[staticmethod] + #[pyo3(signature = (mode=None, height=None, width=None))] pub fn image( mode: Option, height: Option, @@ -281,6 +284,7 @@ impl PyDataType { } #[staticmethod] + #[pyo3(signature = (dtype, shape=None))] pub fn tensor(dtype: Self, shape: Option>) -> PyResult { // TODO(Clark): Add support for non-numeric (e.g. string) tensor columns. if !dtype.dtype.is_numeric() { @@ -297,6 +301,7 @@ impl PyDataType { } #[staticmethod] + #[pyo3(signature = (dtype, shape=None))] pub fn sparse_tensor(dtype: Self, shape: Option>) -> PyResult { if !dtype.dtype.is_numeric() { return Err(PyValueError::new_err(format!( @@ -317,11 +322,11 @@ impl PyDataType { } pub fn to_arrow<'py>(&self, py: Python<'py>) -> PyResult> { - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; match &self.dtype { DataType::FixedShapeTensor(dtype, shape) => { if py - .import_bound(pyo3::intern!(py, "daft.utils"))? + .import(pyo3::intern!(py, "daft.utils"))? .getattr(pyo3::intern!(py, "pyarrow_supports_fixed_shape_tensor"))? .call0()? .extract()? @@ -333,7 +338,7 @@ impl PyDataType { dtype: *dtype.clone(), } .to_arrow(py)?, - pyo3::types::PyTuple::new_bound(py, shape.clone()), + pyo3::types::PyTuple::new(py, shape.clone())?, )) } else { // Fall back to default Daft super extension representation if installed pyarrow doesn't have the diff --git a/src/daft-schema/src/python/schema.rs b/src/daft-schema/src/python/schema.rs index bacc8cc8cf..481b5163d0 100644 --- a/src/daft-schema/src/python/schema.rs +++ b/src/daft-schema/src/python/schema.rs @@ -20,7 +20,7 @@ impl PySchema { } pub fn to_pyarrow_schema<'py>(&self, py: Python<'py>) -> PyResult> { - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; let pyarrow_fields = self .schema .fields diff --git a/src/daft-sql/src/lib.rs b/src/daft-sql/src/lib.rs index 75a819c204..211973f0d3 100644 --- a/src/daft-sql/src/lib.rs +++ b/src/daft-sql/src/lib.rs @@ -18,9 +18,9 @@ use pyo3::prelude::*; #[cfg(feature = "python")] pub fn register_modules(parent: &Bound) -> PyResult<()> { parent.add_class::()?; - parent.add_function(wrap_pyfunction_bound!(python::sql, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::sql_expr, parent)?)?; - parent.add_function(wrap_pyfunction_bound!(python::list_sql_functions, parent)?)?; + parent.add_function(wrap_pyfunction!(python::sql, parent)?)?; + parent.add_function(wrap_pyfunction!(python::sql_expr, parent)?)?; + parent.add_function(wrap_pyfunction!(python::list_sql_functions, parent)?)?; Ok(()) } diff --git a/src/daft-table/src/python.rs b/src/daft-table/src/python.rs index c2c0cc622a..ad53cb03a6 100644 --- a/src/daft-table/src/python.rs +++ b/src/daft-table/src/python.rs @@ -210,6 +210,7 @@ impl PyTable { py.allow_threads(|| Ok(self.table.head(num)?.into())) } + #[pyo3(signature = (fraction, with_replacement, seed=None))] pub fn sample_by_fraction( &self, py: Python, @@ -235,6 +236,7 @@ impl PyTable { }) } + #[pyo3(signature = (size, with_replacement, seed=None))] pub fn sample_by_size( &self, py: Python, @@ -475,12 +477,13 @@ impl PyTable { pub fn to_arrow_record_batch(&self) -> PyResult { Python::with_gil(|py| { - let pyarrow = py.import_bound(pyo3::intern!(py, "pyarrow"))?; + let pyarrow = py.import(pyo3::intern!(py, "pyarrow"))?; ffi::table_to_record_batch(py, &self.table, pyarrow) }) } #[staticmethod] + #[pyo3(signature = (schema=None))] pub fn empty(schema: Option) -> PyResult { Ok(Table::empty(match schema { Some(s) => Some(s.schema), diff --git a/src/daft-writers/src/lance.rs b/src/daft-writers/src/lance.rs index 03c0b89210..c06023f294 100644 --- a/src/daft-writers/src/lance.rs +++ b/src/daft-writers/src/lance.rs @@ -37,12 +37,12 @@ impl FileWriter for LanceWriter { .expect("MicroPartition should have size_bytes for LanceWriter"); Python::with_gil(|py| { let py_micropartition = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "MicroPartition"))? .getattr(pyo3::intern!(py, "_from_pymicropartition"))? .call1((PyMicroPartition::from(data),))?; let written_fragments: PyTable = py - .import_bound(pyo3::intern!(py, "daft.table.table_io"))? + .import(pyo3::intern!(py, "daft.table.table_io"))? .getattr(pyo3::intern!(py, "write_lance"))? .call1(( py_micropartition, @@ -54,7 +54,7 @@ impl FileWriter for LanceWriter { .map(|cfg| daft_io::python::IOConfig { config: cfg.clone(), }), - &self.lance_info.kwargs, + &self.lance_info.kwargs.clone_ref(py), ))? .getattr(pyo3::intern!(py, "to_table"))? .call0()? diff --git a/src/daft-writers/src/pyarrow.rs b/src/daft-writers/src/pyarrow.rs index 9f58ba6f51..d969998a50 100644 --- a/src/daft-writers/src/pyarrow.rs +++ b/src/daft-writers/src/pyarrow.rs @@ -22,10 +22,10 @@ impl PyArrowWriter { partition_values: Option<&Table>, ) -> DaftResult { Python::with_gil(|py| { - let file_writer_module = py.import_bound(pyo3::intern!(py, "daft.io.writer"))?; + let file_writer_module = py.import(pyo3::intern!(py, "daft.io.writer"))?; let file_writer_class = file_writer_module.getattr("ParquetFileWriter")?; let _from_pytable = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "Table"))? .getattr(pyo3::intern!(py, "_from_pytable"))?; let partition_values = match partition_values { @@ -60,10 +60,10 @@ impl PyArrowWriter { partition_values: Option<&Table>, ) -> DaftResult { Python::with_gil(|py| { - let file_writer_module = py.import_bound(pyo3::intern!(py, "daft.io.writer"))?; + let file_writer_module = py.import(pyo3::intern!(py, "daft.io.writer"))?; let file_writer_class = file_writer_module.getattr("CSVFileWriter")?; let _from_pytable = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "Table"))? .getattr(pyo3::intern!(py, "_from_pytable"))?; let partition_values = match partition_values { @@ -99,10 +99,10 @@ impl PyArrowWriter { io_config: &Option, ) -> DaftResult { Python::with_gil(|py| { - let file_writer_module = py.import_bound(pyo3::intern!(py, "daft.io.writer"))?; + let file_writer_module = py.import(pyo3::intern!(py, "daft.io.writer"))?; let file_writer_class = file_writer_module.getattr("IcebergWriter")?; let _from_pytable = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "Table"))? .getattr(pyo3::intern!(py, "_from_pytable"))?; let partition_values = match partition_values { @@ -140,10 +140,10 @@ impl PyArrowWriter { io_config: &Option, ) -> DaftResult { Python::with_gil(|py| { - let file_writer_module = py.import_bound(pyo3::intern!(py, "daft.io.writer"))?; + let file_writer_module = py.import(pyo3::intern!(py, "daft.io.writer"))?; let file_writer_class = file_writer_module.getattr("DeltalakeWriter")?; let _from_pytable = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "Table"))? .getattr(pyo3::intern!(py, "_from_pytable"))?; let partition_values = match partition_values { @@ -180,7 +180,7 @@ impl FileWriter for PyArrowWriter { assert!(!self.is_closed, "Cannot write to a closed PyArrowWriter"); let bytes_written = Python::with_gil(|py| { let py_micropartition = py - .import_bound(pyo3::intern!(py, "daft.table"))? + .import(pyo3::intern!(py, "daft.table"))? .getattr(pyo3::intern!(py, "MicroPartition"))? .getattr(pyo3::intern!(py, "_from_pymicropartition"))? .call1((PyMicroPartition::from(data),))?; diff --git a/src/lib.rs b/src/lib.rs index 0bfb094de4..18ae22892c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ pub mod pylib { #[pyfunction] pub fn refresh_logger(py: Python) -> PyResult<()> { use log::LevelFilter; - let logging = py.import_bound(pyo3::intern!(py, "logging"))?; + let logging = py.import(pyo3::intern!(py, "logging"))?; let python_log_level = logging .getattr(pyo3::intern!(py, "getLogger"))? .call0()? @@ -126,7 +126,7 @@ pub mod pylib { daft_catalog_python_catalog::python::register_modules(&catalog_module)?; // Register testing module - let testing_module = PyModule::new_bound(m.py(), "testing")?; + let testing_module = PyModule::new(m.py(), "testing")?; m.add_submodule(&testing_module)?; daft_scan::python::register_testing_modules(&testing_module)?; From 43bbbebe95a344b2ffe6af778dfc47222d2030e6 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Wed, 8 Jan 2025 16:56:38 -0800 Subject: [PATCH 3/3] ci: Improve visualization of tpcds + tpch benchmarking outputs (#3654) # Overview This PR adds the ability for the `run-cluster` GitHub Actions workflow to output: 1. a file containing the entire printed out plan for one given question 2. additional stats for the planning + execution time for one given question ## Implementation These files are stored inside of `/tmp/ray`, which is then downloaded via the `run-cluster` workflow, which is then uploaded to the GitHub Actions Summary Page. ## Usage You can run this locally or through CI. ### Locally ```sh DAFT_RUNNER=ray python benchmarking/tpcds/ray_entrypoint.py --question=1 --scale-factor=1 ``` ### Remotely ```sh uv run tools/tpcds.py --questions='1,2' --scale-factor=1 ``` --- benchmarking/tpcds/ray_entrypoint.py | 35 ++++++++++++++++++++++++---- tools/tpcds.py | 3 ++- tools/tpch.py | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/benchmarking/tpcds/ray_entrypoint.py b/benchmarking/tpcds/ray_entrypoint.py index 20656b37b9..0d1ced05bc 100644 --- a/benchmarking/tpcds/ray_entrypoint.py +++ b/benchmarking/tpcds/ray_entrypoint.py @@ -1,4 +1,6 @@ import argparse +import json +from datetime import datetime from pathlib import Path import daft @@ -36,7 +38,7 @@ def register_catalog(scale_factor: int) -> SQLCatalog: return SQLCatalog( tables={ table: daft.read_parquet( - f"s3://eventual-dev-benchmarking-fixtures/uncompressed/tpcds-dbgen/{scale_factor}/{table}.parquet" + f"s3://eventual-dev-benchmarking-fixtures/uncompressed/tpcds-dbgen/{scale_factor}/{table}" ) for table in TABLE_NAMES } @@ -51,11 +53,36 @@ def run( catalog = register_catalog(scale_factor) query_file = Path(__file__).parent / "queries" / f"{question:02}.sql" with open(query_file) as f: - query = f.read() + query_string = f.read() - daft.sql(query, catalog=catalog).explain(show_all=True) + info_path = Path("/tmp") / "ray" / "session_latest" / "logs" / "info" + info_path.mkdir(parents=True, exist_ok=True) + query = daft.sql(query_string, catalog=catalog) + + explain_delta = None + with open(info_path / f"plan-{question}.txt", "w") as f: + explain_start = datetime.now() + query.explain(show_all=True, file=f, format="mermaid") + explain_end = datetime.now() + explain_delta = explain_end - explain_start + + execute_delta = None if not dry_run: - daft.sql(query, catalog=catalog).collect() + execute_start = datetime.now() + query.collect() + execute_end = datetime.now() + execute_delta = execute_end - execute_start + + with open(info_path / f"stats-{question}.txt", "w") as f: + stats = json.dumps( + { + "question": question, + "scale-factor": scale_factor, + "planning-time": explain_delta, + "execution-time": execute_delta, + } + ) + f.write(stats) if __name__ == "__main__": diff --git a/tools/tpcds.py b/tools/tpcds.py index 554abac914..d6ec36cc21 100644 --- a/tools/tpcds.py +++ b/tools/tpcds.py @@ -15,7 +15,7 @@ def run( - branch_name: str, + branch_name: Optional[str], questions: Optional[str], scale_factor: int, cluster_profile: str, @@ -49,6 +49,7 @@ def run( parser.add_argument( "--scale-factor", choices=[ + 1, 2, 5, 10, diff --git a/tools/tpch.py b/tools/tpch.py index 744c524bf5..a89415c258 100644 --- a/tools/tpch.py +++ b/tools/tpch.py @@ -17,7 +17,7 @@ def run( - branch_name: str, + branch_name: Optional[str], questions: Optional[str], scale_factor: int, num_partitions: int,