diff --git a/.github/workflows/main-cmake.yml b/.github/workflows/main-cmake.yml index 4a783a6819..9fab45b852 100644 --- a/.github/workflows/main-cmake.yml +++ b/.github/workflows/main-cmake.yml @@ -3,7 +3,7 @@ name: NetCDF-C CMake CI - Windows on: [ pull_request, workflow_dispatch] env: - REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }} + REMOTETESTDOWN: no concurrency: group: ${{ github.workflow }}-${{ github.head_ref }} @@ -53,6 +53,12 @@ jobs: - name: Dump Matrix Context run: echo '${{ toJSON(matrix) }}' + - name: Check Hard Drive Space + shell: bash -el {0} + run: | + df -h + pwd + #- run: echo "CMAKE_PREFIX_PATH=${env.CONDA_PREFIX}/Library" >> $GITHUB_ENV #- run: echo "/c/Users/runneradmin/miniconda3/Library/lib:${GITHUB_PATH}" >> $GITHUB_ENV #- run: echo "" @@ -88,6 +94,12 @@ jobs: ls $CONDA_PREFIX/Library/include/ shell: bash -el {0} + - name: Check Hard Drive Space + shell: bash -el {0} + run: | + df -h + pwd + - name: Perform out-of-directory configuration shell: bash -el {0} run: | @@ -108,6 +120,12 @@ jobs: run: | cd build cat libnetcdf.settings + + - name: Check Hard Drive Space Post Summary + shell: bash -el {0} + run: | + df -h + pwd - name: Perform out-of-directory build - libnetcdf shell: bash -el {0} @@ -163,4 +181,6 @@ jobs: run: | cd build PATH=~/tmp/bin:$PATH ctest . --rerun-failed --output-on-failure -VV + df -h + pwd if: ${{ failure() }} diff --git a/.github/workflows/run_tests_cdash.yml b/.github/workflows/run_tests_cdash.yml index dfb8572560..09be22770b 100644 --- a/.github/workflows/run_tests_cdash.yml +++ b/.github/workflows/run_tests_cdash.yml @@ -7,7 +7,7 @@ name: Run CDash Ubuntu/Linux netCDF Tests on: [workflow_dispatch] env: - REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }} + REMOTETESTDOWN: no concurrency: group: ${{ github.workflow}}-${{ github.head_ref }} diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 5d561807b0..c27cf8666e 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -7,7 +7,7 @@ name: Run Ubuntu/Linux netCDF Tests on: [pull_request,workflow_dispatch] env: - REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }} + REMOTETESTDOWN: no concurrency: group: ${{ github.workflow}}-${{ github.head_ref }} diff --git a/.github/workflows/run_tests_win_cygwin.yml b/.github/workflows/run_tests_win_cygwin.yml index 54e8d0affb..95098c652a 100644 --- a/.github/workflows/run_tests_win_cygwin.yml +++ b/.github/workflows/run_tests_win_cygwin.yml @@ -10,7 +10,7 @@ env: SHELLOPTS: igncr CHERE_INVOKING: 1 CYGWIN_NOWINPATH: 1 - REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }} + REMOTETESTDOWN: no jobs: build-and-test-autotools: @@ -26,6 +26,10 @@ jobs: plugin_dir_option: ["", "--without-plugin-dir"] steps: + - name: Set safe directory for Git + shell: pwsh + run: git config --global --add safe.directory /cygdrive/d/a/netcdf-c/netcdf-c + - name: Fix line endings shell: pwsh run: git config --global core.autocrlf input @@ -55,7 +59,7 @@ jobs: /bin/dash ./configure --enable-hdf5 --enable-shared --disable-static --enable-dap --disable-dap-remote-tests --enable-plugins ${{ matrix.plugin_dir_option }} - --disable-nczarr --disable-nczarr-filters + --disable-nczarr #--disable-nczarr-filters --disable-s3 --with-s3-testing=no @@ -103,8 +107,13 @@ jobs: run: shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}" - steps: +# shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}" + steps: + - name: Set safe directory for Git + shell: pwsh + run: git config --global --add safe.directory /cygdrive/d/a/netcdf-c/netcdf-c + - run: git config --global core.autocrlf input shell: pwsh - uses: actions/checkout@v4 diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index be3034f9af..018863973b 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -46,7 +46,7 @@ jobs: run: autoreconf -if - name: (Autotools) Configure Build - run: ./configure --enable-hdf5 --enable-dap --disable-dap-remote-tests --disable-static --disable-byterange --disable-dap-remote-tests --disable-logging --enable-plugins --disable-nczarr-filters --disable-s3 + run: ./configure --enable-hdf5 --enable-dap --disable-dap-remote-tests --disable-static --disable-byterange --disable-dap-remote-tests --disable-logging --enable-plugins --disable-s3 #--disable-nczarr-filters if: ${{ success() }} - name: (Autotools) Look at config.log if error diff --git a/CMakeLists.txt b/CMakeLists.txt index fc42e5c696..a44a1cbe13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,6 @@ set(PACKAGE "netCDF" CACHE STRING "") include(netcdf_functions_macros) include(deprecated) - # Backport of built-in `PROJECT_IS_TOP_LEVEL` from CMake 3.21 if (NOT DEFINED NETCDF_IS_TOP_LEVEL) set(NETCDF_IS_TOP_LEVEL OFF) @@ -440,18 +439,18 @@ endif() # Format Option checks ################################ -# As a long term goal, and because it is now the case that -# NETCDF_ENABLE_NCZARR => USE_NETCDF4, so make the external options -# NETCDF_ENABLE_NETCDF_4 and NETCDF_ENABLE_NETCDF4 obsolete -# in favor of NETCDF_ENABLE_HDF5. +# As a long term goal, and because it is now the case that --enable-nczarr +# => USE_NETCDF4, make the external options --enable-netcdf-4 and +# --enable-netcdf4 obsolete in favor of --enable-hdf5 # We will do the following for one more release cycle. -# 1. Make NETCDF_ENABLE_NETCDF_4 be an alias for NETCDF_ENABLE_NETCDF4. -# 2. Make NETCDF_ENABLE_NETCDF4 an alias for NETCDF_ENABLE_HDF5. -# 3. Internally, convert most (but not all) uses of USE_NETCDF_4 and USE_NETCDF4 to USE_HDF5. +# 1. Make --enable-netcdf-4 be an alias for --enable-netcdf4. +# 2. Make --enable-netcdf4 an alias for --enable-hdf5. +# 3. Internally, convert most uses of USE_NETCDF_4 ad USE_NETCDF4 to USE_HDF5 -# Collect the values of NETCDF_ENABLE_NETCDF_4, NETCDF_ENABLE_NETCDF4, and NETCDF_ENABLE_HDF5. +# Collect the values of -DNETCDF_ENABLE_NETCDF-4, -DNETCDF_ENABLE_NETCDF4, and -DNETCDF_ENABLE_HDF5. +# Also determine which have been explicitly set on the command line. -# Figure out which options are defined and process options +# Figure out which options are defined if(DEFINED NETCDF_ENABLE_NETCDF_4) set(UNDEF_NETCDF_4 OFF CACHE BOOL "") option(NETCDF_ENABLE_NETCDF_4 "" ON) @@ -493,8 +492,11 @@ if(UNDEF_HDF5) set(NETCDF_ENABLE_HDF5 ON CACHE BOOL "" FORCE) endif() -# Turn off NETCDF_ENABLE_NETCDF4 because it will be used -# as a shorthand for NETCDF_ENABLE_HDF5|NETCDF_ENABLE_HDF4|NETCDF_ENABLE_NCZARR +# Turn off enable_netcdf_4 because it is no longer needed +set(NETCDF_ENABLE_NETCDF_4 OFF CACHE BOOL "" FORCE) + +# Turn off enable_netcdf4 because it will be used +# as a shorthand for ENABLE_HDF5|ENABLE_HDF4|ENABLE_NCZARR set(NETCDF_ENABLE_NETCDF4 OFF CACHE BOOL "" FORCE) option(NETCDF_ENABLE_DAP "Enable DAP2 and DAP4 Client." ON) option(NETCDF_ENABLE_NCZARR "Enable NCZarr Client." ON) @@ -507,10 +509,28 @@ if(NETCDF_ENABLE_HDF4) set(USE_HDF4 ON) endif() +# Decide default Zarr Format for creation +OPTION(NETCDF_ENABLE_DEFAULT_ZARR_FORMAT_V3 "Specify the default Zarr format is V3" OFF) +if(NETCDF_ENABLE_DEFAULT_ZARR_FORMAT_V3) +SET(DFALTZARRFORMAT 3 CACHE STRING "") +ELSE() +SET(DFALTZARRFORMAT 2 CACHE STRING "") +ENDIF() + +IF(NETCDF_ENABLE_NCZARR) + SET(NETCDF_ENABLE_NCZARR_V3 ON CACHE BOOL "Enable Zarr V3" FORCE) + # V3 is enabled if NCzarr is enabled +ELSE() + SET(NETCDF_ENABLE_NCZARR_V3 OFF CACHE BOOL "Enable Zarr V3" FORCE) +ENDIF() + # Netcdf-4 support (i.e. libsrc4) is required by more than just HDF5 (e.g. NCZarr) # So depending on what above formats are enabled, enable netcdf-4 +# as a short-hand for this boolean expression if(NETCDF_ENABLE_HDF5 OR NETCDF_ENABLE_HDF4 OR NETCDF_ENABLE_NCZARR) set(NETCDF_ENABLE_NETCDF4 ON CACHE BOOL "Enable netCDF-4 API" FORCE) + # For convenience, define USE_NETCDF4 + set(USE_NETCDF4 ON CACHE BOOL "Enable netCDF-4 API" FORCE) endif() # enable|disable all forms of network access @@ -534,10 +554,7 @@ endif() # Did the user specify a default minimum blocksize for posixio? set(NCIO_MINBLOCKSIZE 256 CACHE STRING "Minimum I/O Blocksize for netCDF classic and 64-bit offset format files.") -if(NETCDF_ENABLE_NETCDF4) - set(USE_NETCDF4 ON CACHE BOOL "") - set(NETCDF_ENABLE_NETCDF4 ON CACHE BOOL "") -else() +if(NOT USE_NETCDF4) set(USE_HDF4_FILE_TESTS OFF) set(USE_HDF4 OFF) set(NETCDF_ENABLE_HDF4_FILE_TESTS OFF) @@ -993,7 +1010,7 @@ endif(NETCDF_ENABLE_ERANGE_FILL) # Now NETCDF_ENABLE_ERANGE_FILL is either AUTO, ON, or OFF # More relaxed coordinate check is now mandatory for all builds. -set(ENABLE_ZERO_LENGTH_COORD_BOUND ON) +set(NETCDF_ENABLE_ZERO_LENGTH_COORD_BOUND ON) # check and conform with PnetCDF settings on ERANGE_FILL and RELAX_COORD_BOUND if(USE_PNETCDF) @@ -1016,7 +1033,7 @@ if(NETCDF_ENABLE_ERANGE_FILL) set(M4FLAGS "-DERANGE_FILL" CACHE STRING "") endif() -if(ENABLE_ZERO_LENGTH_COORD_BOUND) +if(NETCDF_ENABLE_ZERO_LENGTH_COORD_BOUND) message(STATUS "Enabling a more relaxed check for NC_EINVALCOORDS") target_compile_definitions(netcdf PRIVATE RELAX_COORD_BOUND) endif() @@ -1540,6 +1557,12 @@ if(NETCDF_ENABLE_TESTS) if(NETCDF_ENABLE_NCZARR) include_directories(nczarr_test) add_subdirectory(nczarr_test) + if(NOT ISCMAKE) + # Cannot create target because another target with + # the same name already exists. + # See documentation for policy CMP0002 for more details. + add_subdirectory(v3_nczarr_test) + endif() endif() endif() @@ -1673,10 +1696,10 @@ is_disabled(BUILD_SHARED_LIBS enable_static) is_enabled(BUILD_SHARED_LIBS enable_shared) is_enabled(NETCDF_ENABLE_V2_API HAS_NC2) -is_enabled(NETCDF_ENABLE_NETCDF4 HAS_NC4) is_enabled(NETCDF_ENABLE_HDF4 HAS_HDF4) is_enabled(USE_HDF4 HAS_HDF4) is_enabled(USE_HDF5 HAS_HDF5) +is_enabled(USE_NETCDF4 HAS_NC4) is_enabled(OFF HAS_BENCHMARKS) is_enabled(STATUS_PNETCDF HAS_PNETCDF) is_enabled(STATUS_PARALLEL HAS_PARALLEL) @@ -1735,6 +1758,10 @@ endif() # WARNING: this must match the value in configure.ac set(S3TESTBUCKET "unidata-zarr-test-data" CACHE STRING "S3 test bucket") +# Additional S3 Test Endpoing +set(S3ENDPOINT "s3.us-east-1.amazonaws.com" CACHE STRING "S3 endpoint") + + # The working S3 path tree within the Unidata bucket. # WARNING: this must match the value in configure.ac set(S3TESTSUBTREE "netcdf-c" CACHE STRING "Working S3 path.") @@ -1849,7 +1876,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/s3cleanup.in ${CMAKE_CURRENT_BINARY_D configure_file(${CMAKE_CURRENT_SOURCE_DIR}/s3gc.in ${CMAKE_CURRENT_BINARY_DIR}/s3gc.sh @ONLY NEWLINE_STYLE LF) ##### -# Build and copy nc_test4/findplugin.sh to various places +# Transfer files from a single source to directories that use it. ##### foreach(CC nc_test4 nczarr_test v3_nczarr_test plugins h5_test examples/C) configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins/findplugin.in ${CMAKE_CURRENT_BINARY_DIR}/${CC}/findplugin.sh @ONLY NEWLINE_STYLE LF) diff --git a/Makefile.am b/Makefile.am index 8ccfd6a4d2..6e02782330 100644 --- a/Makefile.am +++ b/Makefile.am @@ -110,8 +110,11 @@ endif # Build Cloud Storage if desired. if NETCDF_ENABLE_NCZARR -ZARR_TEST_DIRS = nczarr_test ZARR = libnczarr +ZARR_TEST_DIRS = nczarr_test +if NETCDF_ENABLE_NCZARR_V3 +ZARR_TEST_DIRS += v3_nczarr_test +endif endif # Optionally build test plugins @@ -218,6 +221,9 @@ if NETCDF_ENABLE_S3_TESTALL mv ${abs_top_builddir}/tmp_@PLATFORMUID@.uids ${abs_top_builddir}/s3cleanup_@PLATFORMUID@.uids endif +clean-local: + rm -fr alltests_* + if NETCDF_ENABLE_S3_TESTALL distclean-local: rm -f ${abs_top_builddir}/s3cleanup_@PLATFORMUID@.uids diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 5759b335c1..b2220bf4bc 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -5,6 +5,9 @@ Release Notes {#RELEASE_NOTES} This file contains a high-level description of this package's evolution. Releases are in reverse chronological order (most recent first). Note that, as of netcdf 4.2, the `netcdf-c++` and `netcdf-fortran` libraries have been separated into their own libraries. +## 4.9.4 - TBD +* Add experimental support for the Zarr Version 3 storage format. This code willl change as the Zarr Version 3 Specification evolves. See [Github #3068](https://github.com/Unidata/netcdf-c/pull/3068). + ## 4.9.3 - TBD * Extend the netcdf API to support programmatic changes to the plugin search path. See [Github #3034](https://github.com/Unidata/netcdf-c/pull/3034) for more information. @@ -26,8 +29,6 @@ This file contains a high-level description of this package's evolution. Release * Convert NCZarr V2 to store all netcdf-4 specific info as attributes. This improves interoperability with other Zarr implementations by no longer using non-standard keys. The price to be paid is that lazy attribute reading cannot be supported. See [Github #2836](https://github.com/Unidata/netcdf-c/pull/2936) for more information. * Cleanup the option code for NETCDF_ENABLE_SET_LOG_LEVEL\[_FUNC\] See [Github #2931](https://github.com/Unidata/netcdf-c/pull/2931) for more information. -### Release Candidate 1 - July 26, 2024 - * Convert NCZarr V2 to store all netcdf-4 specific info as attributes. This improves interoperability with other Zarr implementations by no longer using non-standard keys. The price to be paid is that lazy attribute reading cannot be supported. See [Github #2836](https://github.com/Unidata/netcdf-c/issues/2936) for more information. * Cleanup the option code for NETCDF_ENABLE_SET_LOG_LEVEL\[_FUNC\] See [Github #2931](https://github.com/Unidata/netcdf-c/issues/2931) for more information. * Fix duplicate definition when using aws-sdk-cpp. See [Github #2928](https://github.com/Unidata/netcdf-c/issues/2928) for more information. diff --git a/cmake/netcdf_functions_macros.cmake b/cmake/netcdf_functions_macros.cmake index ea9d8ce819..7ae7b95b04 100644 --- a/cmake/netcdf_functions_macros.cmake +++ b/cmake/netcdf_functions_macros.cmake @@ -221,7 +221,7 @@ macro(print_conf_summary) message("Configuration Summary:") message("") message(STATUS "Building Shared Libraries: ${BUILD_SHARED_LIBS}") - message(STATUS "Building netCDF-4: ${NETCDF_ENABLE_NETCDF_4}") + message(STATUS "Building netCDF-4: ${NETCDF_ENABLE_NETCDF4}") message(STATUS "Building DAP2 Support: ${NETCDF_ENABLE_DAP2}") message(STATUS "Building DAP4 Support: ${NETCDF_ENABLE_DAP4}") message(STATUS "Building Byte-range Support: ${NETCDF_ENABLE_BYTERANGE}") @@ -277,6 +277,39 @@ macro(getdpkg_arch arch) execute_process(COMMAND "${NC_DPKG}" "--print-architecture" OUTPUT_VARIABLE "${arch}" OUTPUT_STRIP_TRAILING_WHITESPACE) endmacro(getdpkg_arch) +macro(NCZARR_SH_TEST basename src) + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../${src}/tst_${basename}.sh SHSOURCE) + # Make sure the order of prepended lines is correct + string(PREPEND SHSOURCE "TESTNCZARR=1\n") + string(PREPEND SHSOURCE "#!/bin/bash\n") + # Replace with FILE(CONFIGURE) when cmake 3.18 is in common use + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1 "${SHSOURCE}") + configure_file(${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1 ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.sh FILE_PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE @ONLY NEWLINE_STYLE LF) + file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1) +endmacro(NCZARR_SH_TEST) + +macro(NCZARR_C_TEST basename newname src) + file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../${src}/${basename}.c CSOURCE) + string(PREPEND CSOURCE "#define TESTNCZARR\n") + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${newname}.c "${CSOURCE}") +endmacro(NCZARR_C_TEST) + +macro(build_bin_test_with_util_lib F UTIL_LIB) + build_bin_test(${F}) + if(ENABLE_DLL) + target_compile_definitions(${F} PUBLIC -DDLL_NETCDF) + endif(ENABLE_DLL) + target_link_libraries(${F} ${UTIL_LIB} ${ALL_TLL_LIBS}) +endmacro() + +macro(add_bin_test_with_util_lib PREFIX F UTIL_LIB) + add_bin_test(${PREFIX} ${F}) + if(ENABLE_DLL) + target_compile_definitions(${PREFIX}_${F} PUBLIC -DDLL_NETCDF) + endif(ENABLE_DLL) + target_link_libraries(${PREFIX}_${F} ${UTIL_LIB} ${ALL_TLL_LIBS}) +endmacro() + ################################ # Functions ################################ @@ -336,4 +369,3 @@ function(getlastdir s ret_val) list(GET list -1 last) set(${ret_val} "${last}" PARENT_SCOPE) endfunction() - diff --git a/cmake/v3_setup.cmake b/cmake/v3_setup.cmake new file mode 100644 index 0000000000..6cd99f43e8 --- /dev/null +++ b/cmake/v3_setup.cmake @@ -0,0 +1,29 @@ +# Sources that are specific to Zarr V3 testing +set(V3FILES run_nan.sh run_mud.sh) +set(V3DATA ref_nczarr2zarr.cdl ref_purezarr.cdl ref_xarray.cdl ref_misc2.cdl ref_jsonconvention.cdl ref_jsonconvention.zmap ref_nulls_zarr.baseline ref_string_zarr.baseline ref_string_nczarr.baseline ref_zarr_test_data_2d.cdl.gz ref_groups_regular.cdl ref_filtered.cdl ref_any.cdl ref_multi.cdl ref_tst_nans.dmp ref_bzip2.cdl ref_tst_mud4-bc.cdl ref_tst_mud4.cdl ref_tst_mud4_chars.cdl) + +# Shell scripts that are copies of same files from nczarr_test +SET(TESTFILES_NCZARR_SH test_nczarr.sh run_chunkcases.sh run_corrupt.sh run_external.sh run_fillonlyz.sh run_filter.sh run_filterinstall.sh run_filter_misc.sh run_filter_vlen.sh run_interop.sh run_jsonconvention.sh run_misc.sh run_nccopy5.sh run_nccopyz.sh run_ncgen4.sh run_nczarr_fill.sh run_nczfilter.sh run_newformat.sh run_notzarr.sh run_nulls.sh run_perf_chunks1.sh run_purezarr.sh run_quantize.sh run_scalar.sh run_specific_filters.sh run_strings.sh run_unknown.sh run_unlim_io.sh run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh) + +# Program files +set(TESTFILES_NCZARR_C test_chunking.c test_filter_vlen.c test_h5_endians.c test_put_vars_two_unlim_dim.c test_quantize.c test_unlim_vars.c tst_pure_awssdk.cpp) + +# Data files +set(TESTDATA_NCZARR ref_nulls_nczarr.baseline ref_zarr_test_data.cdl.gz ref_avail1.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_fillonly.cdl ref_misc1.cdl ref_ndims.cdl ref_newformatpure.cdl ref_nulls.cdl ref_oldformat.cdl ref_perdimspecs.cdl ref_power_901_constants.cdl ref_purezarr_base.cdl ref_quotes.cdl ref_rem.cdl ref_scalar.cdl ref_skip.cdl ref_skipw.cdl ref_string.cdl ref_t_meta_dim1.cdl ref_t_meta_var1.cdl ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta.cdl ref_ut_map_writemeta2.cdl ref_ut_testmap_create.cdl ref_whole.cdl ref_avail1.dmp ref_misc1.dmp ref_ndims.dmp ref_rem.dmp ref_noshape.file.zip ref_groups.h5 ref_notzarr.tar.gz ref_avail1.txt ref_skip.txt ref_ut_json_build.txt ref_ut_json_parse.txt ref_ut_mapapi_search.txt ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt ref_ut_proj.txt ref_whole.txt ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip ref_oldformat.zip ref_power_901_constants_orig.zip ref_quotes_orig.zip) + +macro(v3_setup) +# Add command to update the test source from nczarr_test + foreach(u ${TESTFILES_NCZARR_C} ${TESTDATA_NCZARR}) + set(SRCPATH "${CMAKE_SOURCE_DIR}/nczarr_test/${u}") + set(DSTPATH "${CMAKE_BINARY_DIR}/v3_nczarr_test/${u}") + file(REMOVE ${DSTPATH}) + file(COPY ${SRCPATH} DESTINATION ${DSTPATH}) + endforeach() + + foreach(u ${TESTFILES_NCZARR_SH}) + set(SRCPATH "${CMAKE_SOURCE_DIR}/nczarr_test/${u}") + set(DSTPATH "${CMAKE_BINARY_DIR}/v3_nczarr_test/${u}") + file(REMOVE ${DSTPATH}) + file(COPY ${SRCPATH} DESTINATION ${DSTPATH}) + endforeach() +endmacro() diff --git a/config.h.cmake.in b/config.h.cmake.in index 26a5b4773b..66e27db64f 100644 --- a/config.h.cmake.in +++ b/config.h.cmake.in @@ -106,6 +106,9 @@ are set when opening a binary file on Windows. */ /* default chunk size in bytes */ #cmakedefine DEFAULT_CHUNK_SIZE ${DEFAULT_CHUNK_SIZE} +/* default zarr format*/ +#cmakedefine DFALTZARRFORMAT ${DFALTZARRFORMAT} + /* set this only when building a DLL under MinGW */ #cmakedefine DLL_EXPORT 1 @@ -154,12 +157,18 @@ are set when opening a binary file on Windows. */ /* if true, enable NCZARR */ #cmakedefine NETCDF_ENABLE_NCZARR 1 +/* if true, enable NCZARR Version 3*/ +#cmakedefine NETCDF_ENABLE_NCZARR_V3 1 + /* if true, enable nczarr filter support */ #cmakedefine NETCDF_ENABLE_NCZARR_FILTERS 1 /* if true, enable nczarr zip support */ #cmakedefine NETCDF_ENABLE_NCZARR_ZIP 1 +/* if true, enable nczarr V3 support */ +#cmakedefine NETCDF_ENABLE_NCZARR_V3 1 + /* if true, Allow dynamically loaded plugins */ #cmakedefine NETCDF_ENABLE_PLUGINS 1 @@ -187,6 +196,11 @@ are set when opening a binary file on Windows. */ /* S3 Working subtree path prefix*/ #define S3TESTSUBTREE "${S3TESTSUBTREE}" +/* S3 Test endpoint */ +#define S3ENDPOINT "${S3ENDPOINT}" + +/* S3 Test Bucket */ + /* if true, run extra tests which may not work yet */ #cmakedefine EXTRA_TESTS 1 @@ -517,7 +531,7 @@ with zip */ #cmakedefine VALGRIND_TESTS 1 #cmakedefine NETCDF_ENABLE_CDMREMOTE 1 #cmakedefine USE_HDF5 1 -#cmakedefine ENABLE_FILEINFO 1 +#cmakedefine NETCDF_ENABLE_FILEINFO 1 #cmakedefine TEST_PARALLEL ${TEST_PARALLEL} #cmakedefine BUILD_RPC 1 #cmakedefine USE_X_GETOPT 1 diff --git a/configure.ac b/configure.ac index be202762b3..3f23d38229 100644 --- a/configure.ac +++ b/configure.ac @@ -150,7 +150,6 @@ AC_MSG_NOTICE([checking supported formats]) # Collect the values of --enable-netcdf-4, --enable-netcdf4, and --enable-hdf5. # Also determine which have been explicitly set on the command line. - AC_ARG_ENABLE([netcdf-4], [AS_HELP_STRING([--enable-netcdf-4], [(Deprecated) Synonym for --enable-hdf5; default yes])]) AC_ARG_ENABLE([netcdf4], [AS_HELP_STRING([--enable-netcdf4], @@ -166,11 +165,11 @@ if test "x$enable_netcdf4" != x ; then fi # --enable-netcdf-4 overrides --enable-netcdf4 if latter not defined -if test "x$enable_netcdf_4" != x && test "x$enable_netcdf4" == x ; then +if test "x$enable_netcdf_4" != x && test "x$enable_netcdf4" = x ; then enable_netcdf4="$enable_netcdf_4" fi # --enable-netcdf4 overrides --enable-hdf5 if latter not defined -if test "x$enable_netcdf4" != x && test "x$enable_hdf5" == x ; then +if test "x$enable_netcdf4" != x && test "x$enable_hdf5" = x ; then enable_hdf5="$enable_netcdf4" fi # Otherwise, use --enable-hdf5 @@ -227,8 +226,6 @@ AC_MSG_RESULT($enable_dap) if test "x$enable_remote_functionality" = xno ; then AC_MSG_WARN([All network access is disabled => DAP support disabled.]) enable_dap=no - AC_MSG_WARN([All network access is disabled => NCZARR support disabled.]) - enable_nczarr=no fi AC_MSG_CHECKING([whether netcdf zarr storage format should be disabled]) @@ -238,11 +235,40 @@ AC_ARG_ENABLE([nczarr], test "x$enable_nczarr" = xno || enable_nczarr=yes AC_MSG_RESULT($enable_nczarr) +AC_MSG_CHECKING([whether netcdf zarr storage format version 3 should be enabled]) +AC_ARG_ENABLE([nczarr-v3], + [AS_HELP_STRING([--enable-nczarr-v3], + [enable netcdf zarr version 3 storage support])]) dnl +test "x$enable_nczarr_v3" = xyes || enable_nczarr_v3=no +AC_MSG_RESULT($enable_nczarr_v3) +if test "x$enable_nczarr_v3" = xyes ; then +AC_DEFINE([NETCDF_ENABLE_NCZARR_V3], [1], [if true, include NCZarr V3 support]) +fi +AM_CONDITIONAL([NETCDF_ENABLE_NCZARR_V3],[test "x$enable_nczarr_v3" = xyes]) + # HDF5 | HDF4 | NCZarr => netcdf-4 if test "x$enable_hdf5" = xyes || test "x$enable_hdf4" = xyes || test "x$enable_nczarr" = xyes ; then enable_netcdf4=yes fi +# Choose the default Zarr format +AC_MSG_CHECKING([whether Zarr format 3 is the default]) +AC_ARG_ENABLE([default-zarr-format_v3], + [AS_HELP_STRING([--enable-default-zarr-format_v3], + [Specify the default Zarr format.])]) +test "x$enable_default_zarr_format_v3" = xyes | enable_default_zarr_format_v3=no # V2 is the default by default +if test "x$enable_nczarr_v3" = xno ; then + enable_default_zarr_format_v3=no +fi +AC_MSG_RESULT([$enable_default_zarr_format_v3}]) +if test "x$enable_default_zarr_format_v3" = xyes; then + DFALTZARRFORMAT=3 +else + DFALTZARRFORMAT=2 +fi +AC_DEFINE_UNQUOTED([DFALTZARRFORMAT], [$DFALTZARRFORMAT], [Default Zarr format]) +AC_SUBST([DFALTZARRFORMAT],[$DFALTZARRFORMAT]) + AC_MSG_NOTICE([checking user options]) # Did the user specify a default minimum blocksize (NCIO_MINBLOCKSIZE) for posixio? @@ -257,7 +283,7 @@ AC_DEFINE_UNQUOTED([NCIO_MINBLOCKSIZE], [$NCIO_MINBLOCKSIZE], [min blocksize for # Find valgrind, if available, and add targets for it. AX_VALGRIND_DFLT([sgcheck], [off]) AX_VALGRIND_CHECK -AM_CONDITIONAL(ENABLE_VALGRIND, [test "x$VALGRIND_ENABLED" = xyes]) +AM_CONDITIONAL(NETCDF_ENABLE_VALGRIND, [test "x$VALGRIND_ENABLED" = xyes]) ### # Doxygen and doxygen-related options. @@ -703,6 +729,13 @@ test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes if test "x$enable_dap" = "xno" ; then enable_dap_remote_tests=no fi + +# Provide a global control for remotetest. +if test "xREMOTETESTDOWN" = xyes ; then + AC_MSG_WARN("ENV(REMOTETESTDOWN) => netcdf_enable_dap_remote_tests == no") + enable_dap_remote_tests=no +fi + AC_MSG_RESULT($enable_dap_remote_tests) # Provide a global control for remotetest. @@ -938,7 +971,6 @@ AC_MSG_RESULT([${have_sz}]) if test "x$enable_nczarr" = xno ; then enable_nczarr_zip=no else - # See if we have libzip for NCZarr AC_SEARCH_LIBS([zip_open],[zip zip.dll cygzip.dll],[have_zip=yes],[have_zip=no]) AC_MSG_CHECKING([whether libzip library is available]) @@ -946,10 +978,13 @@ else enable_nczarr_zip=${have_zip} # alias AC_MSG_CHECKING([whether nczarr zip support is enabled]) + AC_ARG_ENABLE([nczarr-zip], + [AS_HELP_STRING([--enable-nczarr-zip], + [enable netcdf zip support (default off)])]) + test "x$have_zip" = xyes -a "x$enable_nczarr_zip" = xyes || enable_nczarr_zip=no AC_MSG_RESULT([${enable_nczarr_zip}]) - if test "x$enable_nczarr_zip" = xyes ; then - AC_DEFINE([NETCDF_ENABLE_NCZARR_ZIP], [1], [If true, then libzip found]) + AC_DEFINE([NETCDF_ENABLE_NCZARR_ZIP], [1], [If true, then libzip found and ZIP for nczarr enabled.]) fi # Check for enabling of S3 support @@ -1061,6 +1096,26 @@ else fi fi +# For convenience, define USE_NETCDF4 +if test "x${enable_hdf5}" = xyes || test "x${enable_nczarr}" = xyes || test "x${enable_dap4}" = xyes ; then +enable_netcdf4=yes +AC_DEFINE([USE_NETCDF4], [1], [convenience to avoid having to specify the above test]) +else +enable_netcdf4=no +fi +AM_CONDITIONAL([USE_NETCDF4], [test x$enable_netcdf4 = xyes]) + +# Does the user want to turn off nc_set_log_level() function? (It will +# always be defined if --enable-logging is used.) +AC_MSG_CHECKING([whether nc_set_log_level() function is included (will do nothing unless enable-logging is also used)]) +AC_ARG_ENABLE([set_log_level_func], [AS_HELP_STRING([--disable-set-log-level-func], + [disable the nc_set_log_level function])]) +test "x$enable_set_log_level_func" = xno -a "x$enable_logging" = xno || enable_set_log_level_func=yes +AC_MSG_RESULT($enable_set_log_level_func) +if test "x$enable_set_log_level_func" = xyes -a "x$enable_netcdf4" = xyes; then + AC_DEFINE([NETCDF_ENABLE_SET_LOG_LEVEL], 1, [If true, define nc_set_log_level.]) +fi + # Check whether we want to enable strict null byte header padding. # See https://github.com/Unidata/netcdf-c/issues/657 for more information. AC_MSG_CHECKING([whether to enable strict null-byte header padding when reading (default off)]) @@ -1911,7 +1966,7 @@ AC_ARG_ENABLE([nczarr-filters], [AS_HELP_STRING([--disable-nczarr-filters], test "x$enable_nczarr_filters" = xno || enable_nczarr_filters=yes AC_MSG_RESULT([$enable_nczarr_filters]) -# Control filter test/example +# Control filter testing AC_MSG_CHECKING([whether filter testing should be run]) AC_ARG_ENABLE([filter-testing], [AS_HELP_STRING([--disable-filter-testing], @@ -2076,6 +2131,7 @@ AC_SUBST(HAS_NC4,[$enable_netcdf4]) AC_SUBST(HAS_CDF5,[$enable_cdf5]) AC_SUBST(HAS_HDF4,[$enable_hdf4]) AC_SUBST(HAS_BENCHMARKS,[$enable_benchmarks]) +AC_SUBST(HAS_NC4,[$enable_netcdf4]) AC_SUBST(HAS_HDF5,[$enable_hdf5]) AC_SUBST(HAS_PNETCDF,[$enable_pnetcdf]) AC_SUBST(HAS_LOGGING, [$enable_logging]) @@ -2092,8 +2148,10 @@ AC_SUBST(HAS_S3_AWS,[$enable_s3_aws]) AC_SUBST(HAS_S3_INTERNAL,[$enable_s3_internal]) AC_SUBST(HAS_HDF5_ROS3,[$has_hdf5_ros3]) AC_SUBST(HAS_NCZARR,[$enable_nczarr]) +AC_SUBST(HAS_NCZARR_V3,[$enable_nczarr_v3]) AC_SUBST(NETCDF_ENABLE_S3_TESTING,[$with_s3_testing]) AC_SUBST(HAS_NCZARR_ZIP,[$enable_nczarr_zip]) +AC_SUBST(NCZARR_DEFAULT_FORMAT,[$DFALTZARRFORMAT]) AC_SUBST(HAS_PLUGINS, [$enable_plugins]) AC_SUBST(HAS_QUANTIZE,[$enable_quantize]) AC_SUBST(HAS_LOGGING,[$enable_logging]) @@ -2125,6 +2183,11 @@ AC_SUBST([S3TESTBUCKET],["unidata-zarr-test-data"]) AC_DEFINE([S3TESTSUBTREE], ["netcdf-c"], [S3 test path prefix]) AC_SUBST([S3TESTSUBTREE],[netcdf-c]) +# Additional S3 Test Endpoint +# WARNING: this must match the value in CMakeLists.txt +AC_DEFINE([S3ENDPOINT], ["s3.us-east-1.amazonaws.com"], [S3 test endpoint]) +AC_SUBST([S3ENDPOINT],["s3.us-east-1.amazonaws.com"]) + # Build a small unique id to avoid interference on same platform PLATFORMUID="$RANDOM" # Make sure uid > 0 @@ -2290,6 +2353,7 @@ AX_SET_META([NC_HAS_S3_AWS],[$enable_s3_aws],[yes]) AX_SET_META([NC_HAS_S3_INTERNAL],[$enable_s3_internal],[yes]) AX_SET_META([NC_HAS_HDF5_ROS3],[$has_hdf5_ros3],[yes]) AX_SET_META([NC_HAS_NCZARR],[$enable_nczarr],[yes]) +AX_SET_META([NC_HAS_NCZARR_V3],[$enable_nczarr_v3],[yes]) AX_SET_META([NC_HAS_LOGGING],[$enable_logging],[yes]) AX_SET_META([NC_HAS_QUANTIZE],[$enable_quantize],[yes]) AX_SET_META([NC_HAS_SZIP],[$enable_hdf5_szip],[yes]) @@ -2322,7 +2386,7 @@ AC_MSG_NOTICE([generating header files and makefiles]) AC_CONFIG_FILES(test_common.sh:test_common.in) AC_CONFIG_FILES(s3cleanup.sh:s3cleanup.in, [chmod ugo+x s3cleanup.sh]) AC_CONFIG_FILES(s3gc.sh:s3gc.in, [chmod ugo+x s3gc.sh]) -for FP in plugins nc_test4 nczarr_test h5_test examples/C ; do +for FP in plugins nc_test4 nczarr_test v3_nczarr_test h5_test examples/C ; do AC_CONFIG_FILES(${FP}/findplugin.sh:plugins/findplugin.in, [chmod ugo+x ${FP}/findplugin.sh]) done AC_CONFIG_FILES(ncdap_test/findtestserver.c:ncdap_test/findtestserver.c.in, [chmod ugo+x ncdap_test/findtestserver.c]) @@ -2343,6 +2407,7 @@ AC_CONFIG_FILES(nczarr_test/test_filter_repeat.c:nc_test4/test_filter_repeat.c) AC_CONFIG_FILES(nczarr_test/test_filter_order.c:nc_test4/test_filter_order.c) AC_CONFIG_FILES([examples/C/run_par_test.sh], [chmod ugo+x examples/C/run_par_test.sh]) AC_CONFIG_FILES([nc-config], [chmod 755 nc-config]) + AC_CONFIG_FILES([Makefile netcdf.pc libnetcdf.settings @@ -2387,6 +2452,7 @@ AC_CONFIG_FILES([Makefile dap4_test/Makefile plugins/Makefile nczarr_test/Makefile + v3_nczarr_test/Makefile ]) AC_OUTPUT() diff --git a/dap4_test/CMakeLists.txt b/dap4_test/CMakeLists.txt index cd9be513c3..9a0f3b5ec0 100644 --- a/dap4_test/CMakeLists.txt +++ b/dap4_test/CMakeLists.txt @@ -52,8 +52,9 @@ ENDIF() IF(NETCDF_ENABLE_DAP_REMOTE_TESTS) add_sh_test(dap4_test test_remote) - add_sh_test(dap4_test test_hyrax) add_sh_test(dap4_test test_dap4url) +# Hyrax is acting flakey, so temporarily disable +# add_sh_test(dap4_test test_hyrax) IF(RUN_MANUAL_TESTS) # The following test can only be run by hand. # It tests earthdata authorization. diff --git a/dap4_test/Makefile.am b/dap4_test/Makefile.am index a6b1c9f566..ec5c81ec79 100644 --- a/dap4_test/Makefile.am +++ b/dap4_test/Makefile.am @@ -47,9 +47,11 @@ if NETCDF_ENABLE_DAP_REMOTE_TESTS TESTS += test_remote.sh TESTS += test_constraints.sh -TESTS += test_hyrax.sh TESTS += test_dap4url.sh +# Hyrax is acting flakey, so temporarily disable +#TESTS += test_hyrax.sh + # The following test can only be run by hand. # It tests earthdata authorization. # Before running it, one needs to do the following: diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 36f6456e90..694a00c1b4 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -90,7 +90,7 @@ ENDIF(NETCDF_ENABLE_DOXYGEN) SET(CUR_EXTRA_DIST ${CUR_EXTRA_DIST} netcdf.m4 DoxygenLayout.xml Doxyfile.in footer.html mainpage.dox tutorial.dox guide.dox types.dox -architecture.dox internal.dox windows-binaries.md +architecture.dox internal.dox windows-binaries.md dispatch.md building-with-cmake.md CMakeLists.txt groups.dox install.md notes.md install-fortran.md credits.md auth.md obsolete/fan_utils.html bestpractices.md filters.md indexing.md diff --git a/docs/cloud.md b/docs/cloud.md index 31eb25e43d..75eb2fc260 100644 --- a/docs/cloud.md +++ b/docs/cloud.md @@ -12,6 +12,8 @@ Cloud Storage Access Using The NetCDF-C Library The NetCDF-C library supports limited access to cloud storage. Currently, that access is restricted to the Amazon S3 cloud storage, so this document is S3-centric. +Limited support is also provided for the Google cloud storage. +Google provides an S3 compatible REST API (See the _quickstart_paths.md document). It is expected that over time, access to additional cloud stores will be added, and this document will be expanded to cover those additional cases. diff --git a/docs/dispatchers.md b/docs/dispatchers.md new file mode 100644 index 0000000000..e69a0c7767 --- /dev/null +++ b/docs/dispatchers.md @@ -0,0 +1,1323 @@ +Internal Dispatch Table Architecture +============================ + + +# Internal Dispatcher Architectures + + + +[TOC] + +# Introduction {#dispatch_intro} + +The netcdf-c library uses an internal dispatch mechanism +as the means for wrapping the netcdf-c API around a wide variety +of underlying storage and stream data formats. + +This document attempts to give an overview of the internal +architectures of the major dispatcher modules. As such, this document is +long an will grow longer over time. + +As a first step, a description is provided on how dispatch modules +are chosen. After that, the internal architecture for specific modules +is described. The + +As of last check, the following formats are supported and each has its +own dispatch table. But beware: some of the listed function signatures +may be out of date and the specific code should be consulted to see +the actual parameters. + +As specific dispatch module architectures are described, that is noted +in the last column of the table of known dispatchers. + + +
FormatDirectory/File (optional) NC_FORMATX NameDescribed +
Dispatch Detectorlibdispatch/dinfermodel.cN.A.yes +
NetCDF-classiclibsrcNC_FORMATX_NC3no +
NetCDF-enhancedlibhdf5NC_FORMATX_NC_HDF5no +
HDF4libhdf4NC_FORMATX_NC_HDF4no +
PNetCDFlibsrcpNC_FORMATX_PNETCDFno +
DAP2libdap2NC_FORMATX_DAP2no +
DAP4libdap4NC_FORMATX_DAP4no +
UDF0N.A.NC_FORMATX_UDF0no +
UDF1N.A.NC_FORMATX_UDF1no +
NCZarrlibnczarrNC_FORMATX_NCZARRno +
+ +## Dispatch Detector {#dispatch_detector} + +The idea for the dispatch detector is that when a user opens or +creates a netcdf file, a specific dispatch table is chosen. A +dispatch table is a struct containing an entry for (almost) every +function in the netcdf-c API. During execution, netcdf API calls are +channeled through that dispatch table to the appropriate function for +implementing that API call. The functions in the dispatch table are +not quite the same as those defined in *netcdf.h*. For simplicity and +compactness, some netcdf.h API calls are mapped to the same dispatch +table function. In addition to the functions, the first entry in the +table defines the model that this dispatch table implements. It will +be one of the NC_FORMATX_XXX values. The second entry in the table is +the version of the dispatch table. The rule is that previous entries +may not be removed, but new entries may be added, and adding new +entries increases the version number. + +The dispatch table represents a distillation of the netcdf API down to +a minimal set of internal operations. The format of the dispatch table +is defined in the file *libdispatch/ncdispatch.h*. Every new dispatch +table must define this minimal set of operations. + +### Adding a New Dispatch Table +In order to make this process concrete, let us assume we plan to add +an in-memory implementation of netcdf-3. + +#### Defining configure.ac flags + +Define a *–-enable* flag option for *configure.ac*. For our +example, we assume the option "--enable-ncm" and the +internal corresponding flag "enable_ncm". If you examine the existing +*configure.ac* and see how, for example, *--enable_dap2* is +defined, then it should be clear how to do it for your code. + +#### Defining a "name space" + +Choose some prefix of characters to identify the new dispatch +system. In effect we are defining a name-space. For our in-memory +system, we will choose "NCM" and "ncm". NCM is used for non-static +procedures to be entered into the dispatch table and ncm for all other +non-static procedures. Note that the chosen prefix should probably start +with "nc" or "NC" in order to avoid name conflicts outside the netcdf-c library. + +#### Extend include/netcdf.h + +Modify the file *include/netcdf.h* to add an NC_FORMATX_XXX flag +by adding a flag for this dispatch format at the appropriate places. +```` + #define NC_FORMATX_NCM 7 +```` + +Add any format specific new error codes. +```` +###define NC_ENCM (?) +```` + +#### Extend include/ncdispatch.h + +Modify the file *include/ncdispatch.h* to +add format specific data and initialization functions; +note the use of our NCM namespace. +```` + #ifdef ENABLE_NCM + extern NC_Dispatch* NCM_dispatch_table; + extern int NCM_initialize(void); + #endif +```` + +#### Define the dispatch table functions + +Define the functions necessary to fill in the dispatch table. As a +rule, we assume that a new directory is defined, *libsrcm*, say. Within +this directory, we need to define *Makefile.am* and *CMakeLists.txt*. +We also need to define the source files +containing the dispatch table and the functions to be placed in the +dispatch table -– call them *ncmdispatch.c* and *ncmdispatch.h*. Look at +*libsrc/nc3dispatch.[ch]* or *libnczarr/zdispatch.[ch]* for examples. + +Similarly, it is best to take existing *Makefile.am* and *CMakeLists.txt* +files (from *libsrcp* for example) and modify them. + +#### Adding the dispatch code to libnetcdf + +Provide for the inclusion of this library in the final libnetcdf +library. This is accomplished by modifying *liblib/Makefile.am* by +adding something like the following. +```` + if ENABLE_NCM + libnetcdf_la_LIBADD += $(top_builddir)/libsrcm/libnetcdfm.la + endif +```` + +#### Extend library initialization + +Modify the *NC_initialize* function in *liblib/nc_initialize.c* by adding +appropriate references to the NCM dispatch function. +```` + #ifdef ENABLE_NCM + extern int NCM_initialize(void); + #endif + ... + int NC_initialize(void) + { + ... + #ifdef ENABLE_NCM + if((stat = NCM_initialize())) return stat; + #endif + ... + } +```` + +Finalization is handled in an analogous fashion. + +#### Testing the new dispatch table + +Typically, tests for a new dispatcher are kept in a separate directory +with a related name. For our running example, it might be *ncm_test*. +The file *ncm_test/Makefile.am* +will look something like this. +```` + # These files are created by the tests. + CLEANFILES = ... + # These are the tests which are always run. + TESTPROGRAMS = test1 test2 ... + test1_SOURCES = test1.c ... + ... + # Set up the tests. + check_PROGRAMS = $(TESTPROGRAMS) + TESTS = $(TESTPROGRAMS) + # Any extra files required by the tests + EXTRA_DIST = ... +```` + +### Top-Level build of the dispatch code + +Provide for *libnetcdfm* to be constructed by adding the following to +the top-level *Makefile.am*. + +```` + if ENABLE_NCM + NCM=libsrcm + NCMTESTDIR=ncm_test + endif + ... + SUBDIRS = ... $(DISPATCHDIR) $(NCM) ... $(NCMTESTDIR) +```` + +### Choosing a Dispatch Table + +The dispatch table is ultimately chosen by the function +NC_infermodel() in libdispatch/dinfermodel.c. This function is +invoked by the NC_create and the NC_open procedures. This can +be, unfortunately, a complex process. The detailed operation of +NC_infermodel() is defined in the companion document in docs/dinternal.md. + +In any case, the choice of dispatch table is currently based on the following +pieces of information. + +1. The mode argument – this can be used to detect, for example, what kind +of file to create: netcdf-3, netcdf-4, 64-bit netcdf-3, etc. +Using a mode flag is the most common mechanism, in which case +*netcdf.h* needs to be modified to define the relevant mode flag. + +2. The file path – this can be used to detect, for example, a DAP url +versus a normal file system file. If the path looks like a URL, then +the fragment part of the URL is examined to determine the specific +dispatch function. + +3. The file contents - when the contents of a real file are available, +the contents of the file can be used to determine the dispatch table. +As a rule, this is likely to be useful only for *nc_open*. + +4. If the file is being opened vs being created. + +5. Is parallel IO available? + +The *NC_infermodel* function returns two values. + +1. model - this is used by nc_open and nc_create to choose the dispatch table. +2. newpath - in some case, usually URLS, the path may be rewritten to include extra information for use by the dispatch functions. + +### Special Dispatch Table Signatures. + +The entries in the dispatch table do not necessarily correspond +to the external API. In many cases, multiple related API functions +are merged into a single dispatch table entry. + +#### Create/Open + +The create table entry and the open table entry in the dispatch table +have the following signatures respectively. +```` + int (*create)(const char *path, int cmode, + size_t initialsz, int basepe, size_t *chunksizehintp, + int useparallel, void* parameters, + struct NC_Dispatch* table, NC* ncp); + + int (*open)(const char *path, int mode, + int basepe, size_t *chunksizehintp, + int use_parallel, void* parameters, + struct NC_Dispatch* table, NC* ncp); +```` + +The key difference is that these are the union of all the possible +create/open signatures from the include/netcdfXXX.h files. Note especially the last +three parameters. The parameters argument is a pointer to arbitrary data +to provide extra info to the dispatcher. +The table argument is included in case the create +function (e.g. *NCM_create_) needs to invoke other dispatch +functions. The very last argument, ncp, is a pointer to an NC +instance. The raw NC instance will have been created by *libdispatch/dfile.c* +and is passed to e.g. open with the expectation that it will be filled in +by the dispatch open function. + +#### Accessing Data with put_vara() and get_vara() + +```` + int (*put_vara)(int ncid, int varid, const size_t *start, const size_t *count, + const void *value, nc_type memtype); +```` + +```` + int (*get_vara)(int ncid, int varid, const size_t *start, const size_t *count, + void *value, nc_type memtype); +```` + +Most of the parameters are similar to the netcdf API parameters. The +last parameter, however, is the type of the data in +memory. Additionally, instead of using an "int islong" parameter, the +memtype will be either ::NC_INT or ::NC_INT64, depending on the value +of sizeof(long). This means that even netcdf-3 code must be prepared +to encounter the ::NC_INT64 type. + +#### Accessing Attributes with put_attr() and get_attr() + +```` + int (*get_att)(int ncid, int varid, const char *name, + void *value, nc_type memtype); +```` + +```` + int (*put_att)(int ncid, int varid, const char *name, nc_type datatype, size_t len, + const void *value, nc_type memtype); +```` + +Again, the key difference is the memtype parameter. As with +put/get_vara, it used ::NC_INT64 to encode the long case. + +#### Pre-defined Dispatch Functions + +It is sometimes not necessary to implement all the functions in the +dispatch table. Some pre-defined functions are available which may be +used in many cases. + +#### Inquiry Functions + +Many of The netCDF inquiry functions operate from an in-memory model of +metadata. Once a file is opened, or a file is created, this +in-memory metadata model is kept up to date. Consequenty the inquiry +functions do not depend on the dispatch layer code. These functions +can be used by all dispatch layers which use the internal netCDF +enhanced data model. + +- NC4_inq +- NC4_inq_type +- NC4_inq_dimid +- NC4_inq_dim +- NC4_inq_unlimdim +- NC4_inq_att +- NC4_inq_attid +- NC4_inq_attname +- NC4_get_att +- NC4_inq_varid +- NC4_inq_var_all +- NC4_show_metadata +- NC4_inq_unlimdims +- NC4_inq_ncid +- NC4_inq_grps +- NC4_inq_grpname +- NC4_inq_grpname_full +- NC4_inq_grp_parent +- NC4_inq_grp_full_ncid +- NC4_inq_varids +- NC4_inq_dimids +- NC4_inq_typeids +- NC4_inq_type_equal +- NC4_inq_user_type +- NC4_inq_typeid + +#### NCDEFAULT get/put Functions + +The mapped (varm) get/put functions have been +implemented in terms of the array (vara) functions. So dispatch layers +need only implement the vara functions, and can use the following +functions to get the and varm functions: + +- NCDEFAULT_get_varm +- NCDEFAULT_put_varm + +For the netcdf-3 format, the strided functions (nc_get/put_vars) +are similarly implemented in terms of the vara functions. So the following +convenience functions are available. + +- NCDEFAULT_get_vars +- NCDEFAULT_put_vars + +For the netcdf-4 format, the vars functions actually exist, so +the default vars functions are not used. + +#### Read-Only Functions + +Some dispatch layers are read-only (ex. HDF4). Any function which +writes to a file, including nc_create(), needs to return error code +::NC_EPERM. The following read-only functions are available so that +these don't have to be re-implemented in each read-only dispatch layer: + +- NC_RO_create +- NC_RO_redef +- NC_RO__enddef +- NC_RO_sync +- NC_RO_set_fill +- NC_RO_def_dim +- NC_RO_rename_dim +- NC_RO_rename_att +- NC_RO_del_att +- NC_RO_put_att +- NC_RO_def_var +- NC_RO_rename_var +- NC_RO_put_vara +- NC_RO_def_var_fill + +#### Classic NetCDF Only Functions + +There are two functions that are only used in the classic code. All +other dispatch layers (except PnetCDF) return error ::NC_ENOTNC3 for +these functions. The following functions are provided for this +purpose: + +- NOTNC3_inq_base_pe +- NOTNC3_set_base_pe + +#### HDF4 Dispatch Layer as a Simple Example + +The HDF4 dispatch layer is about the simplest possible dispatch +layer. It is read-only, classic model. It will serve as a nice, simple +example of a dispatch layer. + +Note that the HDF4 layer is optional in the netCDF build. Not all +users will have HDF4 installed, and those users will not build with +the HDF4 dispatch layer enabled. For this reason HDF4 code is guarded +as follows. +```` +###ifdef USE_HDF4 +... +###endif /*USE_HDF4*/ +```` +Code in libhdf4 is only compiled if HDF4 is +turned on in the build. + +#### Header File Changes + +Adding the HDF4 dispatch table will first require changes to +a number of header files. + +##### The netcdf.h File + +In the main netcdf.h file, we add the following +to the list of NC_FORMATX_XXX definitions +```` +###define NC_FORMATX_NC_HDF4 (3) +```` + +##### The ncdispatch.h File + +In ncdispatch.h we add the following: + +```` +###ifdef USE_HDF4 +extern NC_Dispatch* HDF4_dispatch_table; +extern int HDF4_initialize(void); +extern int HDF4_finalize(void); +###endif +```` + +##### The netcdf_meta.h File + +The netcdf_meta.h file allows for easy determination of what features +are in use. For HDF4, the following is added -- as set by *./configure*: +```` +###define NC_HAS_HDF4 0 /*!< HDF4 support. */ +```` + +##### The hdf4dispatch.h File + +The file *hdf4dispatch.h* contains prototypes and +macro definitions used within the HDF4 code in libhdf4. This include +file should not be used anywhere except in libhdf4. It can be kept +in either the *include* directory or (preferably) the *libhdf4* directory. + +##### Initialization Code Changes in liblib Directory + +The file *nc_initialize.c* is modified to include the following: +```` +###ifdef USE_HDF4 +extern int HDF4_initialize(void); +extern int HDF4_finalize(void); +###endif +```` + +##### Changes to libdispatch/dfile.c + +In order for a dispatch layer to be used, it must be correctly +determined in functions *NC_open()* or *NC_create()* in *libdispatch/dfile.c*. +HDF4 has a magic number that is detected in +*NC_interpret_magic_number()*, which allows *NC_open* to automatically +detect an HDF4 file. + +Once HDF4 is detected, the *model* variable is set to *NC_FORMATX_NC_HDF4*, +and later this is used in a case statement: +```` + case NC_FORMATX_NC_HDF4: + dispatcher = HDF4_dispatch_table; + break; +```` + +This sets the dispatcher to the HDF4 dispatcher, which is defined in +the libhdf4 directory. + +##### Dispatch Table in libhdf4/hdf4dispatch.c + +The file *hdf4dispatch.c* contains the definition of the HDF4 dispatch +table. It looks like this: +```` +/* This is the dispatch object that holds pointers to all the + * functions that make up the HDF4 dispatch interface. */ +static NC_Dispatch HDF4_dispatcher = { +NC_FORMATX_NC_HDF4, /* The model identifier */ +NC_DISPATCH_VERSION, /* The version of this dispatch table */ +NC_RO_create, +NC_HDF4_open, +NC_RO_redef, +NC_RO__enddef, +NC_RO_sync, +... +NC_NOTNC4_set_var_chunk_cache, +NC_NOTNC4_get_var_chunk_cache, +... +}; +```` +Note that most functions use some of the predefined dispatch +functions. Functions that start with NC_RO* are read-only, they return +::NC_EPERM. Functions that start with NOTNC4* return ::NC_ENOTNC4. + +Only the functions that start with NC_HDF4* need to be implemented for +the HDF4 dispatch layer. There are 6 such functions: + +- NC_HDF4_open +- NC_HDF4_abort +- NC_HDF4_close +- NC_HDF4_inq_format +- NC_HDF4_inq_format_extended +- NC_HDF4_get_vara + +##### HDF4 Reading Code + +The code in *hdf4file.c* opens the HDF4 SD dataset, and reads the +metadata. This metadata is stored in the netCDF internal metadata +model, allowing the inq functions to work. + +The code in *hdf4var.c* does an *nc_get_vara()* on the HDF4 SD +dataset. This is all that is needed for all the nc_get_* functions to +work. + +### Appendix A. Changing NC_DISPATCH_VERSION + +When new entries are added to the *struct NC_Dispatch* type `located in include/netcdf_dispatch.h.in` it is necessary to do two things. + +1. Bump the NC_DISPATCH_VERSION number +2. Modify the existing dispatch tables to include the new entries. +It if often the case that the new entries do not mean anything for +a given dispatch table. In that case, the new entries may be set to +some variant of *NC_RO_XXX* or *NC_NOTNC4_XXX* *NC_NOTNC3_XXX*. + +Modifying the dispatch version requires two steps: +1. Modify the version number in *netcdf-c/configure.ac*, and +2. Modify the version number in *netcdf-c/CMakeLists.txt*. + +The two should agree in value. + +#### NC_DISPATCH_VERSION Incompatibility + +When dynamically adding a dispatch table +-- in nc_def_user_format (see libdispatch/dfile.c) -- +the version of the new table is compared with that of the built-in +NC_DISPATCH_VERSION; if they differ, then an error is returned from +that function. + +### Appendix B. Inferring the Dispatch Table + +As mentioned above, the dispatch table is inferred using the following +information: +1. The mode argument +2. The file path/URL +3. The file contents (when available) + +The primary function for doing this inference is in the file +*libdispatch/dinfermodel.c* via the API in *include/ncmodel.h*. +The term *model* is used here to include (at least) the following +information (see the structure type *NCmodel* in *include/ncmodel.h*). + +1. impl -- this is an NC_FORMATX_XXX value defining, in effect, the + dispatch table to use. +2. format -- this is an NC_FORMAT_XXX value defining the API to support: netcdf classic or netcdf enhanced. + +The construction of the model is primarily carried out by the function +*NC*infermodel()* (in *libdispatch/dinfermodel.c*). +It is given the following parameters: +1. path -- (IN) absolute file path or URL +2. modep -- (IN/OUT) the set of mode flags given to *NC_open* or *NC_create*. +3. iscreate -- (IN) distinguish open from create. +4. useparallel -- (IN) indicate if parallel IO can be used. +5. params -- (IN/OUT) arbitrary data dependent on the mode and path. +6. model -- (IN/OUT) place to store inferred model. +7. newpathp -- (OUT) the canonical rewrite of the path argument. + +As a rule, these values are used in the this order to infer the model. +1. file contents -- highest precedence +2. url (if it is one) -- using the "mode=" key in the fragment (see below). +3. mode flags +4. default format -- lowest precedence + +If the path appears to be a URL, then it is parsed. +Information is extracted from the URL, and specifically, +the fragment key "mode=" is the critical element. +The URL will be rewritten to a canonical form with the following +changes. +1. The fragment part ("#..." at the end) is parsed and the "mode=" key + is extracted and its value is converted to a list of tags. +2. If the leading protocol is not http/https, then the protocol is added + to the mode list. That protocol is then replaced with either http or https. +3. Certain singleton values in the fragment are extracted and removed + and added to the mode list. Consider, for example, "http://....#dap4". + The "dap4" singleton is removed and added to the mode list. +4. For backward compatibility, the values of "proto=" and "protocol=" + are removed from the fragment and their value is added to the mode list. +5. The final mode list is converted to a comma separated string + and re-inserted into the fragment. +6. The final mode list is modified to remove duplicates. + +The final result is the canonical form of the URL and is returned in the +newpathp argument described above. + +The mode list then is used as part of the inference process to choose +a dispatch table. + +# Point of Contact {#dispatch_poc} + +*Author*: Dennis Heimbigner
+*Email*: dennis.heimbigner@gmail.com
+*Initial Version*: 12/22/2021
+*Last Revised*: 7/7/2024 + diff --git a/docs/filters.md b/docs/filters.md index 8a510ee3db..de7103ea93 100644 --- a/docs/filters.md +++ b/docs/filters.md @@ -700,10 +700,8 @@ one less than the number of significant bunary figures: artifacts in multipoint statistics introduced by BitGroom (see https://doi.org/10.5194/gmd-14-377-2021). - # Debugging {#filters_debug} - Depending on the debugger one uses, debugging plugins can be very difficult. It may be necessary to use the old printf approach for debugging the filter itself. @@ -993,11 +991,11 @@ typedef struct NCZ_codec_t { Currently always NCZ_CODEC_HDF5 */ const char* codecid; /* The name/id of the codec */ unsigned int hdf5id; /* corresponding hdf5 id */ - void (*NCZ_codec_initialize)(void); - void (*NCZ_codec_finalize)(void); - int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp); - int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); - int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* nparamsp, unsigned** paramsp); + void (*NCZ_codec_initialize)(NCproplist* env); + void (*NCZ_codec_finalize)(NCproplist* env); + int (*NCZ_codec_to_hdf5)(NCproplist* env, const char* codec, int* h5idp, int* nparamsp, unsigned** paramsp); + int (*NCZ_hdf5_to_codec)(NCproplist* env, int h5id, size_t nparams, const unsigned* params, char** codecp); + int (*NCZ_modify_parameters)(NCproplist* env, int* h5idp, size_t* vnparamsp, unsigned** vparamsp, size_t* nparamsp, unsigned** paramsp); } NCZ_codec_t; ```` @@ -1015,12 +1013,14 @@ visible parameters. ##### Signature ```` - int NCZ_codec_to_hdf(const char* codec, int* nparamsp, unsigned** paramsp); + int NCZ_codec_to_hdf(NCproplist* env, const char* codec, int* h5idp, int* nparamsp, unsigned** paramsp); ```` ##### Arguments -1. codec — (in) ptr to JSON string representing the codec. -2. nparamsp — (out) store the length of the converted HDF5 unsigned vector -3. paramsp — (out) store a pointer to the converted HDF5 unsigned vector; caller must free the returned vector. Note the double indirection. +1. env $mdash; (in) ptr to a property list of key+value pairs. +2. codec — (in) ptr to JSON string representing the codec. +3. h5idp — (in/out) the hdf5 filter id. +4. nparamsp — (out) store the length of the converted HDF5 unsigned vector +5. paramsp — (out) store a pointer to the converted HDF5 unsigned vector; caller must free the returned vector. Note the double indirection. Return Value: a netcdf-c error code. @@ -1031,12 +1031,12 @@ return a corresponding JSON codec representation of those visible parameters. ##### Signature ```` - int NCZ_hdf5_to_codec)(int ncid, int varid, size_t nparams, const unsigned* params, char** codecp); + int NCZ_hdf5_to_codec)(NCproplist* env, int id, size_t nparams, const unsigned* params, char** codecp); ```` ##### Arguments -1. ncid — the variables' containing group -2. varid — the containing variable +1. env — property list of key+value pairs. +2. id — the hdf5 id. 3. nparams — (in) the length of the HDF5 visible parameters vector 4. params — (in) pointer to the HDF5 visible parameters vector. 5. codecp — (out) store the string representation of the codec; caller must free. @@ -1050,12 +1050,12 @@ to a set of working parameters; also provide option to modify visible parameters ##### Signature ```` - int NCZ_modify_parameters(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); + int NCZ_modify_parameters(NCproplist* env, int* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); ```` ##### Arguments -1. ncid — (in) group id containing the variable. -2. varid — (in) the id of the variable to which this filter is being attached. +1. env — (in) property list of key+value pairs. +2. idp — (in/out) the hdf5 id. 3. vnparamsp — (in/out) the count of visible parameters 4. vparamsp — (in/out) the set of visible parameters 5. wnparamsp — (out) the count of working parameters @@ -1070,8 +1070,12 @@ This function is called as soon as a shared library is loaded and matched with a ##### Signature ```` - int NCZ_codec_initialize)(void); + int NCZ_codec_initialize)(NCproplist* env); ```` +##### Arguments + +1. env — (in) property list of key+value pairs. + Return Value: a netcdf-c error code. #### NCZ\_codec\_finalize @@ -1082,8 +1086,12 @@ If the client code does not invoke *nc\_finalize* then memory checkers may compl ##### Signature ```` - int NCZ_codec_finalize)(void); + int NCZ_codec_finalize)(NCproplist* env); ```` +##### Arguments + +1. env — (in) property list of key+value pairs. + Return Value: a netcdf-c error code. ### Multi-Codec API @@ -1106,7 +1114,26 @@ The list of returned items are used to try to provide defaults for any HDF5 filters that have no corresponding Codec. This is for internal use only. -## Appendix F. Standard Filters {#filters_appendixf} +## Appendix F. Default HDF5 Filter Codecs {#filters_appendixf} + +It is recognized that it will be a while (if ever) until +HDF5 filters also specify the necessary codec information. +In order to provide some support for filters that do not have +corresponding codec support, a "_hdf5raw_" codec manager is provided. + +This hdf5raw codec manager encodes the parameters of the HDF5 filter +into one of these two codec forms: + +* Zarr Version 2 + ````{"id": "_hdf5raw_", "hdf5id": "<hdf5-id>, "nparams": <uint>, "0": <uint>...,"<N>": <uint>}```` +* Zarr Version 3 + ````{"name": "_hdf5raw_", "configuration": {"hdf5id": <uint>, "nparams": <uint>, "0": <uint>...,"<N>": <uint>}}```` + +There are couple things to note about hdf5raw: +1. this cannot be used if a modify_parameters function is required. +2. this representation will not be usable by other Zarr implementations, unless of course they choose to implement it. + +## Appendix G. Standard Filters {#filters_appendixg} Support for a select set of standard filters is built into the NetCDF API. Generally, they are accessed using the following generic API, where XXXX is @@ -1136,10 +1163,10 @@ Consider the zstandard compressor, which is one of the supported standard filter When installing the netcdf library, the following other libraries must be installed. 1. *libzstd.so* | *zstd.dll* | *libzstd.dylib* -- The actual zstandard compressor library; typically installed by using your platform specific package manager. -2. The HDF5 wrapper for *libzstd.so* -- There are several options for obtaining this (see [Appendix G](#filters_appendixg).) +2. The HDF5 wrapper for *libzstd.so* -- There are several options for obtaining this (see [Appendix H](#filters_appendixh).) 3. (Optional) The Zarr wrapper for *libzstd.so* -- you need this if you intend to read/write Zarr datasets that were compressed using zstandard; again see [Appendix G](#filters_appendixg). -## Appendix G. Finding Filter Implementations {#filters_appendixg} +## Appendix H. Finding Filter Implementations {#filters_appendixh} A major problem for filter users is finding an implementation of an HDF5 filter wrapper and (optionally) its corresponding NCZarr wrapper. There are several ways to do this. @@ -1160,7 +1187,7 @@ You can install this library to get access to these supported filters. It does not currently include the required NCZarr Codec API, so they are only usable with netcdf-4. This will change in the future. -## Appendix H. Auto-Install of Filter Wrappers {#filters_appendixh} +## Appendix I. Auto-Install of Filter Wrappers {#filters_appendixi} As part of the overall build process, a number of filter wrappers are built as shared libraries in the "plugins" directory. These wrappers can be installed as part of the overall netcdf-c installation process. @@ -1185,7 +1212,7 @@ provided by the *lib__nczh5filters.so* shared library. Note also that if you disable HDF5 support, but leave NCZarr support enabled, then all of the above filters should continue to work. -## Appendix I. A Warning on Backward Compatibility {#filters_appendixi} +## Appendix J. A Warning on Backward Compatibility {#filters_appendixj} The API defined in this document should accurately reflect the current state of filters in the netCDF-c library. Be aware that @@ -1212,4 +1239,4 @@ For additional information, see [Appendix B](#filters_appendixb). *Author*: Dennis Heimbigner
*Email*: dennis.heimbigner@gmail.com
*Initial Version*: 1/10/2018
-*Last Revised*: 5/18/2022 +*Last Revised*: 10/18/2023 diff --git a/docs/internal.md b/docs/internal.md index 02ec903fc3..f3ce837571 100644 --- a/docs/internal.md +++ b/docs/internal.md @@ -13,6 +13,7 @@ It covers the following issues. * [Inferring File Types](#intern_infer) * [Adding a Standard Filter](#intern_filters) * [Test Interference](#intern_isolation) +* [Managing NCZarr Tests](#intern_nczarr_tests) # 1. Including C++ Code in the netcdf-c Library {#intern_cpp} @@ -246,7 +247,7 @@ use this information to speed up the handling of fixed size types. # 3. Inferring File Types {#intern_infer} -As described in the companion document -- docs/dispatch.md -- +As described in the companion document -- docs/dispatchers.md -- when nc\_create() or nc\_open() is called, it must figure out what kind of file is being created or opened. Once it has figured out the file kind, the appropriate "dispatch table" can be used @@ -655,31 +656,34 @@ It soon became apparent that there were resources shared between tests and that execution sometimes caused interference between tests. In order to fix the inter-test interference, several approaches were used. -1. Renaming resources (primarily files) so that tests would create difference test files. +1. Renaming resources (primarily files) so that tests would create different test files. 2. Telling the test system that there were explicit dependencies between tests so that they would not be run in parallel. 3. Isolating test resources by creating independent directories for each test. ## Test Isolation -The isolation mechanism is currently used mostly in nczarr_tests. +The isolation mechanism is currently used mostly in nczarr_tests/v3_nczarr_tests. It requires that tests are all executed inside a shell script. When the script starts, it invokes a shell function called "isolate". -This function looks in current directory for a directory called "testset_\". -If "testset_\ is not found then it creates it. -This directory is then used to isolate all test output. - -After calling "isolate", the script enters the "testset_\" -directory. Then each actual test creates a directory in which to +This function looks in current directory for a directory called "alltests_\/\", +where "\" is the name of a test directory such as "nczarr_tests", or "nc_test4", etc. +If "alltests_\/\" is not found then it creates it. +This directory is then used to isolate all test output for the specified test directory. +After calling "isolate", the script enters the "alltests_\/\" +directory. + +Within the test directory, each actual test creates a directory in which to store any file resources that it creates during execution. -Suppose, for example, that the shell script is called "run_XXXX.sh". -The isolate function creates a directory with the general name "testset_\". -Then the run_XXX.sh script creates a directory "testset_\/testdir_XXX", +Suppose, for example, that the shell script is called "run_XXXX.sh", and is in the "ncdump" test directory. +The isolate function creates a directory with the general name "alltests_\/ncdump". +Then the run_XXX.sh script creates a directory "alltests_\/ncdump/testdir_XXX", enters it and runs the test. -During cleanup, specifically "make clean", all the testset_\ directories are deleted. +During cleanup, specifically "make clean", the directory alltests_\ is deleted, which of course +deletes all the subsidiary test directories. The "\" is a unique identifier created using the "date +%s" command. It returns an integer representing the number of seconds since the start of the so-called "epoch" basically "00:00:00 UTC, 1 January 1970". Using a date makes it easier to detect and reclaim obsolete -testset directories. +"alltests" directories. ## Cloud Test Isolation @@ -693,10 +697,12 @@ interfere with local testing by individual users. This problem is difficult to solve, but a mostly complete solution has been implemented possible with cmake, but not (as yet) possible with automake. -In any case, there is a shell function called s3isolate in nczarr_test/test_nczarr.sh that operates on cloud resources in a way that is similar to the isolate function. +In any case, there is a shell function called s3isolate in +nczarr_test/test_nczarr.sh that operates on cloud resources in a way +that is similar to the isolate function. The s3isolate does several things: 1. It invokes isolate to ensure local isolation. -2. It creates a path prefix relative to the Unidata S3 bucket that has the name "testset_\", where this name +2. It creates a path prefix relative to the Unidata S3 bucket that has the name "alltests_\/\", where this name is the same as the one created by the isolate function. 3. It appends the uid to a file called s3cleanup_\.uids. This file may accumulate several uids indicating the keys that need to be cleaned up. The pid is a separate small unique id to avoid s3cleanup interference. @@ -704,7 +710,7 @@ The s3isolate does several things: The test script then ensures that any cloud resources are created as extensions of the path prefix. Cleanup of S3 resources is complex. -In configure.ac or the top-level CMakeList.txt files, the path "netcdf-c/testset_\" +In configure.ac or the top-level CMakeList.txt files, the path "netcdf-c/alltests_\>" is created and via configuration commands, is propagated to various Makefile.am and specific script files. @@ -717,9 +723,8 @@ In cmake, the CTestCustom.cmake mechanism is used and contains the following com ENDIF() ```` -In automake, the "check-local" extension mechanism is used -because it is invoked after all tests are run in the nczarr_test -directory. So nczarr_test/Makefile.am contains the following +In automake, the "clean-local" extension mechanism is used. +So nczarr_test/Makefile.am contains the following equivalent code: ```` if NETCDF_ENABLE_S3_TESTALL @@ -763,9 +768,40 @@ This file is called "s3cleanup_\.json". 5. Use the "aws delete-objects" command to delete the keys. 6. Repeat steps 4 and 5 for each set of 500 keys. +# 6. Managing NCZarr Tests {#intern_nczarr_tests} + +When testing NCZarr, it is necessary to run tests for (NC)Zarr version 2 (aka V2) and for (NC)Zarr version 3 (aka V3). +In support of this, there are two test directories: *nczarr_test* and *v3_nczarr_test*. +When the tests in *nczarr_test* are executed, they default to using Zarr version 2 (=> NCZarr version 2). +Similarly, when the tests in *v3_nczarr_test* are executed, they default to using Zarr version 3 (=> NCZarr version 3). + +It turns out that almost all of the V2 tests can be reused for testing V3. +So, the tests in *v3_nczarr_test* are copies of the tests in *nczarr_test*. +It turns out that automake is not easily capable of copying those tests on the fly. +This is principally because the automake *make distcheck* command does not allow +modifications to the source directory, but only to the build directory. This means +that the tests must be heavily modified to handle the two cases where scripts, programs, +and test data are in _\$\$\{srcdir\}_ versus when they are in _\$\$\{builddir\}_. + +## AutoMake Testing +For AutoMake testing, selected V2 tests are copied to the *v3_nczarr_test* +directory using the *BUILT_SOURCES* mechanism. +If a new test is added to the *nczarr_test* directory, then its manifestation +in the *v3_nczarr_test* must be decided. + +If the test can be used directly, then the file v3_nczarr_test must be modified +and added to the TESTFILES_NCZARR variable. If the data files +referenced by the test can be used directly, then they should be added to the +TESTDATA_NCZARR variable. + +If the test must be modified for V3 use, then the modified +test should be github-add'ed to the v3_nczarr_test directory +and inserted into the Makefile.am. Similarly, any V3 specific +data files must be manually added to the Makefile.am and github-add'ed. + # Point of Contact {#intern_poc} *Author*: Dennis Heimbigner
*Email*: dmh at ucar dot edu
*Initial Version*: 12/22/2021
-*Last Revised*: 9/16/2023 +*Last Revised*: 4/10/2024 diff --git a/docs/nczarr.md b/docs/nczarr.md index e3db2016ea..e7f23f4822 100644 --- a/docs/nczarr.md +++ b/docs/nczarr.md @@ -10,7 +10,7 @@ The NetCDF NCZarr Implementation Beginning with netCDF version 4.8.0, the Unidata NetCDF group has extended the netcdf-c library to support data stored using the Zarr data model and storage format [4,6]. As part of this support, netCDF adds support for accessing data stored using cloud storage (e.g. Amazon S3 [1] ). -The goal of this project, then, is to provide maximum interoperability between the netCDF Enhanced (netcdf-4) data model and the Zarr version 2 [4] data model. This is embodied in the netcdf-c library so that it is possible to use the netcdf API to read and write Zarr formatted datasets. +The goal of this project is to provide maximum interoperability between the netCDF Enhanced (netcdf-4) data model and the Zarr version 2 [4] or Version 3 [13] data model. This is embodied in the netcdf-c library so that it is possible to use the netcdf API to read and write Zarr formatted datasets. In order to better support the netcdf-4 data model, the netcdf-c library implements a limited set of extensions to the *Zarr* data model. This extended model is referred to as *NCZarr*. @@ -31,22 +31,22 @@ Notes on terminology in this document. # The NCZarr Data Model {#nczarr_data_model} -NCZarr uses a data model that, by design, extends the Zarr Version 2 Specification . +NCZarr uses a data model that, by design, extends the Zarr Version 2 Specification or Version 3 Specification. __Note Carefully__: a legal _NCZarr_ dataset is expected to also be a legal _Zarr_ dataset. -The inverse is true also. A legal _Zarr_ dataset is expected to also be a legal _NCZarr_ dataset, where "legal" means it conforms to the Zarr specification(s). -In addition, certain non-Zarr features are allowed and used. -Specifically the XArray [7] ''\_ARRAY\_DIMENSIONS'' attribute is one such. +The inverse is true also. A legal _Zarr_ dataset is expected to also be a legal _NCZarr_ dataset, where "legal" means it conforms to the Zarr version 2 or 3 specification. +In addition, certain extra-Zarr features are allowed and used, namely: +1. the XArray [7] ''\_ARRAY\_DIMENSIONS'' attribute. +2. the .zmetadata conventions where all the JSON metadata is held in a single object. -There are two other, secondary assumption: +There are two other, secondary assumptions: 1. The actual storage format in which the dataset is stored -- a zip file, for example -- can be read by the _Zarr_ implementation. -2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See [the companion document on +2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See the [companion document on filters](./md_filters.html "filters") for details. Briefly, the data model supported by NCZarr is netcdf-4 minus -the user-defined types and full String type support. -However, a restricted form of String type +the user-defined types. However, a restricted form of String type is supported (see Appendix D). As with netcdf-4, chunking is supported. Filters and compression are also [supported](./md_filters.html "filters"). @@ -108,9 +108,14 @@ See the document "quickstart_paths" for details about using URLs. There are, however, some details that are important. -- Protocol: this should be _https_ or _s3_,or _file_. - The _s3_ scheme is equivalent to "https" plus setting "mode=s3". - Specifying "file" is mostly used for testing, but also for directory tree or zipfile format storage. +Several URL protocols are semantically meaningfull for the NCZarr implementation. +* _http_ or _https_ -- this just signals that we have a URL; the actual storage type is inferred from the _mode_ flag or by probing the object to which the URL refers. +* _file_ -- The _file_ scheme is equivalent to "mode=...,file,..." +* _s3_ -- The _s3_ scheme is equivalent to "https" plus setting "mode=...,s3,..." plus using an elided host. +* _gs3_ -- The _gs3_ scheme is equivalent to "https" plus setting "mode=...,gs3,...", and using the google-specific host. +* _zoh_ -- The _zoh_ scheme is equivalent to "http" plus setting "mode=...,zoh,...", plus using a host that leads to a server supporting the ZoH REST API. + +Note that currently there is no "zip:" protocol so it must be inferred or specified by a _mode_ tag. ## Client Parameters @@ -119,11 +124,21 @@ The fragment part of a URL is used to specify information that is interpreted to For reading, _key=value_ pairs are provided for specifying the storage format. - mode=nczarr|zarr +The _zarr_ mode implies restricting the format to the pure Zarr V2 or V3 format. +The _nczarr_ mode implies using the netcdf Zarr extensions. + Additional pairs are provided to specify the Zarr version. -- mode=v2 +- mode=v2|v3 + +Obviously, _v2_ implies using the Zarr Version 2 format; similarly for _v3_. -Additional pairs are provided to specify the storage medium: Amazon S3 vs File tree vs Zip file. -- mode=file|zip|s3 +Additional pairs are provided to specify the storage medium: Amazon S3 vs File vs, etc. +- mode=file|zip|s3|gs3|zoh + +The modes imply use of a specific driver: +* The _s3_ driver stores data using Amazon S3 or some equivalent. +* The _file_ driver stores data in a directory tree. +* The _zip_ driver stores data in a local zip file. Note that when reading, an attempt will be made to infer the format and Zarr version and storage medium format by probing the @@ -131,35 +146,24 @@ file. If inferencing fails, then it is reported. In this case, the client may need to add specific mode flags to avoid inferencing. -Typically one will specify three mode flags: one to indicate what format -to use and one to specify the way the dataset is to be stored. -For example, a common one is "mode=zarr,file" - - -Obviously, when creating a file, inferring the type of file to create -is not possible so the mode flags must be set specifically. -This means that both the storage medium and the exact storage -format must be specified. -Using _mode=nczarr_ causes the URL to be interpreted as a -reference to a dataset that is stored in NCZarr format. -The _zarr_ mode tells the library to use NCZarr, but to restrict its operation to operate on pure Zarr. - - -The modes _s3_, _file_, and _zip_ tell the library what storage medium -driver to use. -* The _s3_ driver stores data using Amazon S3 or some equivalent. -* The _file_ driver stores data in a directory tree. -* The _zip_ driver stores data in a local zip file. +Obviously, when creating a file, inferencing is not +possible so the mode flags must be set specifically. +In the most general case, one will specify three mode flags: one to indicate what format +to use, one to specify the way the dataset is to be stored, +and one to specifiy the Zarr format version. +For example, a common one is "mode=zarr,file,v2" +If not specified, the version will be the default specified when +the netcdf-c library was built. As an aside, it should be the case that zipping a _file_ format directory tree will produce a file readable by the -_zip_ storage format, and vice-versa. +_zip_ storage format, and vice-versa. This may change depending +on the outcome of current deliberations by the Zarr committee. By default, the XArray convention is supported for Zarr Version 2 -and used for both NCZarr files and pure Zarr files. - +and used for both NCZarr files and pure Zarr files. It is not +needed for Version 3, which has an equivalent array metadata key +called "dimension_names" This means that every variable in the root group whose named dimensions are also in the root group will have an attribute called *\_ARRAY\_DIMENSIONS* that stores those dimension names. @@ -196,7 +200,8 @@ An important restriction is placed on the structure of the tree, namely that keys are only defined for content-bearing objects. Further, all the leaves of the tree are these content-bearing objects. This means that the key for one content-bearing object should not -be a prefix of any other key. +be a prefix of any other key. For example and given the key "/x/y/zarr.json", +there should not exist any other key with the same prefix, "/x/y/zarr.json/z" for example. There several other concepts of note. 1. __Dataset__ - a dataset is the complete tree contained by the key defining @@ -207,23 +212,23 @@ and "contains" data in the form of an arbitrary sequence of 8-bit bytes. The zmap API defined here isolates the key-value pair mapping code from the Zarr-based implementation of NetCDF-4. - It wraps an internal C dispatch table manager for implementing an +It wraps an internal C dispatch table manager for implementing an abstract data structure implementing the zmap key/object model. Of special note is the "search" function of the API. __Search__: The search function has two purposes: 1. Support reading of pure zarr datasets (because they do not explicitly track their contents). -2. Debugging to allow raw examination of the storage. See zdump for example. +2. Debugging to allow raw examination of the storage. See _zdump_ for example. The search function takes a prefix path which has a key syntax (see above). -The set of legal keys is the set of keys such that the key references a content-bearing object -- e.g. /x/y/.zarray or /.zgroup. -Essentially this is the set of keys pointing to the leaf objects of the tree of keys constituting a dataset. -This set potentially limits the set of keys that need to be examined during search. - The search function returns a limited set of names, where the set of names are immediate suffixes of a given prefix path. -That is, if _\_ is the prefix path, then search returnsnall _\_ such that _\/\_ is itself a prefix of a "legal" key. +That is, if _\_ is the prefix path, then search returns all _\_ such that _\/\_ is itself a prefix of a "legal" key. This can be used to implement glob style searches such as "/x/y/*" or "/x/y/**" +The term "legal keys" is the set of keys such that the key references a content-bearing object -- e.g. /x/y/.zarray or /.zgroup. +Essentially this is the set of keys pointing to the leaf objects of the tree of keys constituting a dataset. +This set potentially limits the set of keys that need to be examined during search. + This semantics was chosen because it appears to be the minimum required to implement all other kinds of search using recursion. It was also chosen to limit the number of names returned from the search. Specifically @@ -250,7 +255,7 @@ so they are not included in the zmap data structure. __A Note on Error Codes:__ -The zmap API returns some distinguished error code: +The zmap API returns some distinguished error codes: 1. NC_NOERR if a operation succeeded 2. NC_EEMPTY is returned when accessing a key that has no content. 3. NC_EOBJECT is returned when an object is found which should not exist @@ -263,12 +268,11 @@ But this does not propagate outside the zmap_file implementation. ## Zmap Implementatons -The primary zmap implementation is _s3_ (i.e. _mode=nczarr,s3_) and indicates that the Amazon S3 cloud storage -- or some related applicance -- is to be used. -Another storage format uses a file system tree of directories and files (_mode=nczarr,file_). -A third storage format uses a zip file (_mode=nczarr,zip_). -The latter two are used mostly for debugging and testing. -However, the _file_ and _zip_ formats are important because they are intended to match corresponding storage formats used by the Python Zarr implementation. -Hence it should serve to provide interoperability between NCZarr and the Python Zarr, although this interoperability has had only limited testing. +The primary zmap implementation is _s3_ (i.e. _mode=zarr,s3_) and indicates that the Amazon S3 cloud storage -- or some related applicance -- is to be used. +Another storage format uses a file system tree of directories and files (_mode=zarr,file_). +A third storage format uses a zip file (_mode=zarr,zip_). +The _file_ and _zip_ formats are important because they are intended to match corresponding storage formats used by the Python Zarr implementation. +Hence they should serve to provide interoperability between NCZarr and the Python Zarr, although this interoperability has had only limited testing. Examples of the typical URL form for _file_ and _zip_ are as follows. ```` @@ -297,9 +301,9 @@ This requirement imposed some constraints on the reading of Zarr datasets using 1. Zarr allows some primitive types not recognized by NCZarr. Over time, the set of unrecognized types is expected to diminish. Examples of currently unsupported types are as follows: - * "c" -- complex floating point - * "m" -- timedelta - * "M" -- datetime + * "c" -- complex floating point + * "m" -- timedelta + * "M" -- datetime 2. The Zarr dataset may reference filters and compressors unrecognized by NCZarr. 3. The Zarr dataset may store data in column-major order instead of row-major order. The effect of encountering such a dataset is to output the data in the wrong order. @@ -316,7 +320,7 @@ A good value of _n_ is 9. # Zip File Support {#nczarr_zip} In order to use the _zip_ storage format, the libzip [3] library must be installed. -Note that this is different from zlib. +Note that this is different from zlib (aka "deflate"). ## Addressing Style @@ -326,14 +330,14 @@ The notion of "addressing style" may need some expansion. Amazon S3 accepts two For example: ``` -https://.s2.<region>.amazonaws.com/ +https://.s2..amazonaws.com/ ``` 2. Path -- the path addressing style places the bucket in at the front of the path part of a URL. For example: ``` -https://s3.<region>.amazonaws.com// +https://s3..amazonaws.com// ``` The NCZarr code will accept either form, although internally, it is standardized on path style. @@ -346,17 +350,17 @@ The reason for this is that the bucket name forms the initial segment in the key The NCZarr storage format is almost identical to that of the the standard Zarr format. The data model differs as follows. -1. Zarr only supports anonymous dimensions -- NCZarr supports only shared (named) dimensions. -2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string. -3. Zarr does not explicitly support unlimited dimensions -- NCZarr does support them. +1. Zarr only supports anonymous dimensions (plus a limited set of names via _\_ARRAY_ATTRIBUTES_) -- NCZarr supports only shared (named) dimensions, but can read anonymous dimensions by assigning special names to the anonymous dimensions. +2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string (in "JSON" format). NCZarr supports typing of attributes. +3. Zarr might not explicitly support unlimited dimensions (the documentation is unclear) -- NCZarr does support them. ## Storage Medium Consider both NCZarr and Zarr, and assume S3 notions of bucket and object. -In both systems, Groups and Variables (Array in Zarr) map to S3 objects. +In both systems, Groups and Variables (aka Arrays in Zarr) map to S3 objects. Containment is modeled using the fact that the dataset's key is a prefix of the variable's key. -So for example, if variable _v1_ is contained in top level group g1 -- _/g1 -- then the key for _v1_ is _/g1/v_. -Additional meta-data information is stored in special objects whose name start with ".z". +So for example, if variable _v1_ is contained in top level group _g1_ (i.e. _/g1_) -- then the key for _v1_ is _/g1/v1_. +Additional meta-data information is stored in special objects whose name start with ".z" (V2) or "zarr.json" (V3). In Zarr Version 2, the following special objects exist. 1. Information about a group is kept in a special object named _.zgroup_; @@ -366,6 +370,7 @@ so for example the object _/g1/v1/.zarray_. 3. Group-level attributes and variable-level attributes are stored in a special object named _.zattr_; so for example the objects _/g1/.zattr_ and _/g1/v1/.zattr_. 4. Chunk data is stored in objects named "\.\...,\" where the ni are positive integers representing the chunk index for the ith dimension. +Note that the character '/' can substiture for the '.' character in the chunk name. The first three contain meta-data objects in the form of a string representing a JSON-formatted dictionary. The NCZarr format uses the same objects as Zarr, but inserts NCZarr @@ -373,17 +378,17 @@ specific attributes in the *.zattr* object to hold NCZarr specific information The value of each of these attributes is a JSON dictionary containing a variety of NCZarr specific information. -These NCZarr-specific attributes are as follows: +These attributes are as follows: -_\_nczarr_superblock\__ -- this is in the top level group's *.zattr* object. +_\_nczarr_superblock\__ -- this attribute key is in the top level group's *.zattr* object. It is in effect the "superblock" for the dataset and contains any netcdf specific dataset level information. It is also used to verify that a given key is the root of a dataset. -Currently it contains keys that are ignored and exist only to ensure that +Currently it contains one key that is ignored and is only to ensure that older netcdf library versions do not crash. * "version" -- the NCZarr version defining the format of the dataset (deprecated). -_\_nczarr_group\__ -- this key appears in every group's _.zattr_ object. +_\_nczarr_group\__ -- this attribute key appears in every group's _.zattr_ object. It contains any netcdf specific group information. Specifically it contains the following keys: * "dimensions" -- the name and size of shared dimensions defined in this group, as well an optional flag indictating if the dimension is UNLIMITED. @@ -391,60 +396,97 @@ Specifically it contains the following keys: * "groups" -- the name of sub-groups defined in this group. These lists allow walking the NCZarr dataset without having to use the potentially costly search operation. -_\_nczarr_array\__ -- this key appears in the *.zattr* object associated +_\_nczarr_array\__ -- this attribute key appears in the *.zattr* object associated with a _.zarray_ object. It contains netcdf specific array information. Specifically it contains the following keys: -* dimension_references -- the fully qualified names of the shared dimensions referenced by the variable. -* storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. Also signals if a variable is scalar. +* dimension_references -- the names of the shared dimensions referenced by the variable. +* storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. -_\_nczarr_attr\__ -- this attribute appears in every _.zattr_ object. +_\_nczarr_attr\__ -- this attribute key appears in every _.zattr_ object. Specifically it contains the following keys: -* types -- the types of all attributes in the _.zattr_ object. +* types -- the types of all of the attributes in the _.zattr_ object. ## Translation {#nczarr_translation} -With some loss of netcdf-4 information, it is possible for an nczarr library to read the pure Zarr format and for other zarr libraries to read the nczarr format. +With some constraints, it is possible for an nczarr library to read the pure Zarr format and for other zarr libraries to read the nczarr format. -The latter case, zarr reading nczarr, is trival because all of the nczarr metadata is stored as ordinary, String valued (but JSON syntax), attributes. +The latter case should require no special decoding by the non-nczarr library +because all nczarr specific extensions are encoded to appear as ordinary +zarr attributes. -The former case, nczarr reading zarr is possible assuming the nczarr code can simulate or infer the contents of the missing _\_nczarr\_xxx_ attributes. +The former case -- nczarr reading zarr -- is possible if the nczarr code can simulate or infer the contents of the missing _\_nczarr\_xxx_ attributes. As a rule this can be done as follows. -1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. -The search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables). -Constructing the set of "shared dimensions" is carried out +1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. Alternatively, the root group may contain a _.zmetadata_ object that can be used to determine the lists of variables and subgroups. + + For V2, the search looks for occurrences of _.zmetadata_, _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables). +For V3, the search looks for occurrences of _zarr.json_. + + Constructing the set of "shared dimensions" is carried out by walking all the variables in the whole dataset and collecting the set of unique integer shapes for the variables. -For each such dimension length, a top level dimension is created +For each such dimension length, a dimension is created in the root group named "_Anonymous_Dimension_" where len is the integer length. -2. _\_nczarr_array\__ -- The dimension referencess are inferred by using the shape in _.zarray_ and creating references to the simulated shared dimensions. -netcdf specific information. +2. _\_nczarr_array\__ -- The dimension references are inferred by using the shape +in _.zarray_ (or _zarr.json_) and creating references to the simulated shared dimension. 3. _\_nczarr_attr\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. # Compatibility {#nczarr_compatibility} In order to accomodate existing implementations, certain mode tags are provided to tell the NCZarr code to look for information used by specific implementations. -## XArray +## The "xarray" Mode -The Xarray [7] Zarr implementation uses its own mechanism for specifying shared dimensions. +The xarray [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification) Zarr implementation uses its own mechanism for specifying an approximation to shared dimensions. It uses a special attribute named ''_ARRAY_DIMENSIONS''. The value of this attribute is a list of dimension names (strings). An example might be ````["time", "lon", "lat"]````. -It is almost equivalent to the ````_nczarr_array "dimension_references" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. The Xarray dimension list differs from the netcdf-4 shared dimensions in two ways. -1. Specifying Xarray in a non-root group has no meaning in the current Xarray specification. -2. A given name can be associated with different lengths, even within a single array. This is considered an error in NCZarr. +It is essentially equivalent to the ````_nczarr_array "dimension_references" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. -The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr. +The xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr. If possible, this attribute will be read/written by default, but can be suppressed if the mode value "noxarray" is specified. If detected, then these dimension names are used to define shared dimensions. -The following conditions will cause ''_ARRAY_DIMENSIONS'' to not be written. +Any of the following conditions will cause ''_ARRAY_DIMENSIONS'' not to be written. * The variable is not in the root group, * Any dimension referenced by the variable is not in the root group. +* ''_ARRAY_DIMENSIONS'' assigns conflicting sizes to a dimension name. Note that this attribute is not needed for Zarr Version 3, and is ignored. +## The ".zmetdata" Mode +The NCzarr implementation of Version 2 also support the ".zmetadata" convention. +This convention adds an extra, root-level object called ".zmetadata". +This object is a JSON dictionary with this form: +```` +{"metadata": + { + "": , + "": , + ... + "": + }, +"zarr_consolidated_format":1 +} +```` +Each <key> refers to a content-bearing object and the <contents> is the JSON content of that object. +An example might look as follows: +```` +{ + "metadata": + { + ".zgroup": {"zarr_format": 2}, + ".zattr": {"globalfloat": 1}, + "v/.zarray": {"zarr_format": 2, "shape": [1], "dtype": ".amazonaws.com/datasetbucket/rootkey\#mode=nczarr&awsprofile=unidata" ```` + +# References {#nczarr_bib} + +[1] [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/index.html)
+[2] [Amazon Simple Storage Service Library](https://github.com/aws/aws-sdk-cpp)
+[3] [The LibZip Library](https://libzip.org/)
+[4] [NetCDF ZARR Data Model Specification](https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf-zarr-data-model-specification)
+[5] [Python Documentation: 8.3. +collections — High-performance dataset datatypes](https://docs.python.org/2/library/collections.html)
+[6] [Zarr Version 2 Specification](https://zarr.readthedocs.io/en/stable/spec/v2.html)
+[7] [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification)
+[8] [Dynamic Filter Loading](https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf)
+[9] [Officially Registered Custom HDF5 Filters](https://portal.hdfgroup.org/display/support/Registered+Filter+Plugins)
+[10] [C-Blosc Compressor Implementation](https://github.com/Blosc/c-blosc)
+[11] [Conda-forge packages / aws-sdk-cpp](https://anaconda.org/conda-forge/aws-sdk-cpp)
+[12] [GDAL Zarr](https://gdal.org/drivers/raster/zarr.html)
+[13] [NetCDF ZARR Data Model Specification Version 3](https://zarr-specs.readthedocs.io/en/latest/specs.html) + # Appendix A. Building NCZarr Support {#nczarr_build} Currently the following build cases are known to work. @@ -501,6 +561,7 @@ The relevant ./configure options are as follows. The relevant CMake flags are as follows. 1. *-DNETCDF_ENABLE_NCZARR=off* -- equivalent to the Automake *--disable-nczarr* option. + ## Testing NCZarr S3 Support {#nczarr_testing_S3_support} The relevant tests for S3 support are in the _nczarr_test_ directory. @@ -519,6 +580,10 @@ also test S3 support with this option. ```` --with-s3-testing=yes ```` +Otherwise, read-only testing is possible by specifying +```` +--with-s3-testing=public +```` ### NetCDF CMake Build @@ -551,7 +616,7 @@ This affects the depth to which groups can be nested because the key encodes the # Appendix C. JSON Attribute Convention. {#nczarr_json} -The Zarr V2 specification is somewhat vague on what is a legal +The Zarr V2 (and V3) specification is somewhat vague on what is a legal value for an attribute. The examples all show one of two cases: 1. A simple JSON scalar atomic values (e.g. int, float, char, etc), or 2. A JSON array of such values. @@ -623,7 +688,7 @@ and then store it as the equivalent netcdf vector. * If the dtype is not defined, then infer the dtype based on the first JSON value in the array, and then store it as the equivalent netcdf vector. -3. The attribute is any other JSON structure. +3. All other JSON-expressions. * Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR. ## Notes @@ -678,22 +743,23 @@ For writing variables and NCZarr attributes, the type mapping is as follows: Admittedly, this encoding is a bit of a hack. So when reading data with a pure zarr implementaion -the above types should always appear as strings, +attributes with the above types should always appear as strings, and the type that signals NC_CHAR (in NCZarr) would be handled by Zarr as a string of length 1. - - -# References {#nczarr_bib} - -[1] [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/index.html)
-[2] [Amazon Simple Storage Service Library](https://github.com/aws/aws-sdk-cpp)
-[3] [The LibZip Library](https://libzip.org/)
-[4] [NetCDF ZARR Data Model Specification](https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf-zarr-data-model-specification)
-[5] [Python Documentation: 8.3. -collections — High-performance dataset datatypes](https://docs.python.org/2/library/collections.html)
-[6] [Zarr Version 2 Specification](https://zarr.readthedocs.io/en/stable/spec/v2.html)
-[7] [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification)
-[8] [Dynamic Filter Loading](https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf)
-[9] [Officially Registered Custom HDF5 Filters](https://portal.hdfgroup.org/display/support/Registered+Filter+Plugins)
-[10] [C-Blosc Compressor Implementation](https://github.com/Blosc/c-blosc)
-[11] [Conda-forge packages / aws-sdk-cpp](https://anaconda.org/conda-forge/aws-sdk-cpp)
-[12] [GDAL Zarr](https://gdal.org/drivers/raster/zarr.html)
- # Change Log {#nczarr_changelog} [Note: minor text changes are not included.] @@ -866,6 +911,9 @@ intended to be a detailed chronology. Rather, it provides highlights that will be of interest to NCZarr users. In order to see exact changes, It is necessary to use the 'git diff' command. +## 01/16/2025 +1. Document the addition of .zmetadata support. + ## 03/31/2024 1. Document the change to V2 to using attributes to hold NCZarr metadata. @@ -891,4 +939,4 @@ include arbitrary JSON expressions; see Appendix D for more details. __Author__: Dennis Heimbigner
__Email__: dmh at ucar dot edu
__Initial Version__: 4/10/2020
-__Last Revised__: 4/02/2024 +__Last Revised__: 1/16/2025 diff --git a/docs/quickstart_paths.md b/docs/quickstart_paths.md index 4481f756b0..4a7e8cc6e5 100644 --- a/docs/quickstart_paths.md +++ b/docs/quickstart_paths.md @@ -62,20 +62,7 @@ The query and fragment are optional. ### Examples of URL Paths for NetCDF-C * https://thredds.ucar.edu/catalog -## Addendum A. Amazon S3 Specific URLS {#nc_paths_s3_urls} -A URL path is required for accessing datasets on the Amazon S3 storage cloud. -Unfortunately S3 URLs are complicated. -It has the following features: -* Protocol: _https_ or _s3_. The _s3_ scheme is equivalent to "https" plus setting various tags in the query and/or fragment part of the URL. -* Host: Amazon S3 defines three forms: _Virtual_, _Path_, and _S3_ - + _Virtual_: the host includes the bucket name as in __bucket.s3.<region>.amazonaws.com__ or __bucket.s3.amazonaws.com__ - + _Path_: the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.<region>.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__ - + _S3_: the protocol is "s3:" and if the host is a single name, then it is interpreted as the bucket. The region is determined using an algorithm defined in the nczarr documentation. - + _Other_: It is possible to use other non-Amazon cloud storage, but that is cloud library dependent. -* Query: currently not used. -* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation. - -## Addendum B. Known Fragment Keys {#nc_paths_frag_keys} +## Addendum A. Known Fragment Keys {#nc_paths_frag_keys} The fragment part of a URL is used to pass information deep into the netcdf-c library to control its actions. @@ -84,8 +71,8 @@ This appendix list known keys, although it may be somewhat out-of-date. The current set of keys used in the netcdf-c library is as follows. * _mode_ -- A special key that is used to provide single values for controlling the netcdf-c library. It consists of a comma separated sequence of values primarily used to control the file format. -The *mode* key supports the following values - - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol +The *mode* key currently supports the following values + - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol (default if no mode is specified) - _dap4_ -- Specifies that the URL accesses a resource using the DAP4 protocol - _netcdf-3_ -- Specifies that a file is a netcdf-classic file - _classic_ -- Alias for _netcdf-3_ @@ -101,6 +88,7 @@ The *mode* key supports the following values - _file_ --Specifies that the file is an NCZarr/Zarr file stored as a file tree - _zip_ --Specifies that the file is an NCZarr/Zarr file stored as a zip file - _bytes_ -- Specifies that the file is remote and is to be read using byte-range support + - _zoh_ --Specifies that the file is remote and supports the [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol in NCZarr format * _dap2_ -- equivalent to "mode=dap2" * _dap4_ -- equivalent to "mode=dap4" @@ -108,3 +96,51 @@ The *mode* key supports the following values * _log_ -- turn on logging for the duration of the data request * _show=fetch_ -- log curl fetch commands +## Addendum B. Amazon S3 Specific URLS {#nc_paths_s3_urls} +A URL path is required for accessing datasets on the Amazon S3 storage cloud. +Unfortunately S3 URLs are complicated. +They can have the following forms: +* _Virtual_: the protocol is "http:" or "https:", the mode specifies "s3", and the host starts with the bucket name; e.g. __bucket.s3.<region>.amazonaws.com__ or __bucket.s3.amazonaws.com__ +* _Path_: the protocol is "http:" or "https:", the mode specifies "s3", and the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.<region>.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__ +* _Protocol_: the protocol is "s3:" and if the host is a single name, then it is interpreted as the bucket. The region is determined using an algorithm defined in the nczarr documentation. + +For all of the above URL forms, there are two additional pieces. +* Query: currently not used. +* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation. + +## Addendum C. Google Storage Specific URLS {#nc_paths_google_urls} +Google provides an interface to its storage that is compatible with the Amazon S3 REST API. +A URL path is required for accessing datasets on the Google storage cloud. +Note that the Google host is always "storage.googleapis.com" and has no concept of region. +It has the following forms. +* _Path_: the bucket name is the first segment of the path. +For example __storage.googleapis.com/bucket__. +* _Protocol_: the protocol is "gs3:" and if the host is a single name, then it is interpreted as the bucket. The _gs3_ scheme is equivalent to "https" plus setting various tags in the query and/or fragment part of the URL. +For example __gs3://bucket/__. + +For all of the above URL forms, there are two additional pieces. +* Query: currently not used. +* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation. + +## Addendum D. Zarr-Over-HTTP (ZoH) Protocol Specific URLS {#nc_paths_zoh_urls} +The [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol provides an interface to any server supporting the ZoH REST API. +The URLs for this API are very similar to the S3 or Google URLs. +Note the _virtual_ URL format is not currently supported. +A ZoH URL has one of the following forms. +* _Path_: the protocol is "http:" or "https:", the host is any standard host (including an optional port number), and the bucket name is the first segment of the path. +For example __http://zoh.gwdg.de/<bucket>/<key>__. +* _Protocol_: the protocol is "zoh:" and a complete host must be specified. +The URL path part is the key to be interpreted by the ZoH server +as it wishes. + +For all of the above URL forms, there are two additional pieces. +* Query: currently not used. +* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation. + +## Point of Contact {#nc_paths_poc} + +__Author__: Dennis Heimbigner
+__Email__: dmh at ucar dot edu
+__Initial Version__: 4/10/2020
+__Last Revised__: 1/16/2025 + diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt index d58d7f7aa7..96d2d7cbc1 100644 --- a/include/CMakeLists.txt +++ b/include/CMakeLists.txt @@ -4,6 +4,8 @@ # University Corporation for Atmospheric Research/Unidata. # See netcdf-c/COPYRIGHT file for more info. + + ##### # Installation of various netCDF headers. ##### @@ -56,6 +58,10 @@ ADD_EXTRA_DIST("${CUR_EXTRA_DIST}") # One time read ncextern.h file(READ ncexternl.h NCEXTH0) +##### +# Built Sources +##### + # Built source: netcdf_json.h file(READ ncjson.h JSONH0) STRING(REPLACE "NCJSON_H" "NETCDF_JSON_H" JSONH1 "${JSONH0}") diff --git a/include/Makefile.am b/include/Makefile.am index f47bdf4dd2..07e663779d 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -10,7 +10,7 @@ include_HEADERS = netcdf.h netcdf_meta.h netcdf_mem.h netcdf_aux.h \ netcdf_filter.h netcdf_filter_build.h netcdf_filter_hdf5_build.h \ -netcdf_dispatch.h +netcdf_dispatch.h netcdf_vutils.h include_HEADERS += netcdf_json.h netcdf_proplist.h @@ -48,21 +48,26 @@ BUILT_SOURCES = netcdf_json.h netcdf_proplist.h # marked with a macro (OPTSTATIC) that marks the entry point as # static inside netcdf_json.h. This is an ugly hack to avoid # having to reference libnetcdf in the nczarr code wrappers. -# Note also that we incorporate the core of ncexternl.h into the netcdf_json.h file. +# Note that the file is built in builddir in case the build +# is out of source. # Give the recipe for building netcdf_json.h netcdf_json.h: ${top_srcdir}/libdispatch/ncjson.c ${top_srcdir}/include/ncjson.h ${top_srcdir}/include/ncexternl.h rm -fr ${builddir}/netcdf_json.h - cat ${srcdir}/ncjson.h | sed -e 's/NCJSON_H/NETCDF_JSON_H/' >> ${builddir}/netcdf_json.h - echo '#ifdef NETCDF_JSON_H' >> ${builddir}/netcdf_json.h - sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> ${builddir}/netcdf_json.h + cat ${srcdir}/ncjson.h \ + | sed -e '/!NCJSON_H/d' \ + | sed -e 's/NCJSON_H/NETCDF_JSON_H/' \ + >> ${builddir}/netcdf_json.h + cat ${srcdir}/../libdispatch/ncjson.c | sed -e '/ncjson.h/d' >> ${builddir}/netcdf_json.h echo '#endif /*NETCDF_JSON_H*/' >> ${builddir}/netcdf_json.h # netcdf_proplist is analogous to netcdf_json but, of course, using libdispatch/ncproplist.c and include/ncproplist.h # Give the recipe for building netcdf_proplist.h. Basically same as for netcdf_json.h netcdf_proplist.h: ${top_srcdir}/libdispatch/ncproplist.c ${top_srcdir}/include/ncproplist.h ${top_srcdir}/include/ncexternl.h rm -fr ${builddir}/netcdf_proplist.h - cat ${srcdir}/ncproplist.h | sed -e 's/NCPROPLIST_H/NETCDF_PROPLIST_H/' >> ${builddir}/netcdf_proplist.h - echo '#ifdef NETCDF_PROPLIST_H' >> ${builddir}/netcdf_proplist.h - sed -e '/ncproplist.h/d' < ${srcdir}/../libdispatch/ncproplist.c >> ${builddir}/netcdf_proplist.h + cat ${srcdir}/ncproplist.h \ + | sed -e '/!NCPROPLIST_H/d' \ + | sed -e 's/NCPROPLIST_H/NETCDF_PROPLIST_H/' \ + >> ${builddir}/netcdf_proplist.h + cat ${srcdir}/../libdispatch/ncproplist.c | sed -e '/ncproplist.h/d' >> ${builddir}/netcdf_proplist.h echo '#endif /*NETCDF_PROPLIST_H*/' >> ${builddir}/netcdf_proplist.h diff --git a/include/nc4internal.h b/include/nc4internal.h index 21157e9f9a..7e64dd3ef3 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -43,7 +43,7 @@ /* typedef enum {GET, PUT} NC_PG_T; */ /** These are the different objects that can be in our hash-lists. */ -typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT; +typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFILE} NC_SORT; /** The netCDF V2 error code. */ #define NC_V2_ERR (-1) @@ -100,6 +100,8 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT; # define VIRTUALFLAG 8 /** Per-variable attribute, as opposed to global */ # define VARFLAG 16 + /** If written via NCZarr, then is a complex json attribute */ +# define COMPLEXJSON 32 /** Boolean type, to make the code easier to read. */ typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t; @@ -460,24 +462,39 @@ extern int nc_get_alignment(int* thresholdp, int* alignmentp); /**************************************************/ /* Begin to collect global state info in one place (more to do) */ +#ifdef WATCH +extern NClist* pluginpaths; +extern NClist* zpluginpaths; +#define PLUGINPATHS pluginpaths +#define ZPLUGINPATHS zpluginpaths +#else +#define PLUGINPATHS gs->pluginpaths +#define ZPLUGINPATHS gs->zarr.pluginpaths +#endif + typedef struct NCglobalstate { int initialized; char* tempdir; /* track a usable temp dir */ char* home; /* track $HOME */ char* cwd; /* track getcwd */ struct NCRCinfo* rcinfo; /* Currently only one rc file per session */ +#ifndef WATCH NClist* pluginpaths; /* Global Plugin State */ +#endif struct GlobalZarr { /* Zarr specific parameters */ char dimension_separator; int default_zarrformat; +#ifndef WATCH NClist* pluginpaths; /* NCZarr mirror of plugin paths */ - NClist* codec_defaults; - NClist* default_libs; - /* All possible HDF5 filter plugins */ - /* Consider onverting to linked list or hash table or +#endif + NClist* codec_defaults; /* NClist */ + NClist* default_libs; /* NClist */ + /* All possible HDF5 filter plugins (except hdf5raw */ + /* Consider converting to linked list or hash table or equivalent since very sparse */ struct NCZ_Plugin** loaded_plugins; //[H5Z_FILTER_MAX+1]; size_t loaded_plugins_max; /* plugin filter id index. 0 #endif +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef __APPLE__ /* GCC strikes again */ +#ifndef uint +typedef unsigned int uint; +#endif +#ifndef ushort +typedef unsigned short ushort; +#endif +#endif /*__APPLE__*/ + +#include +#include + +#ifdef _WIN32 +#include +#include +#endif /* This is included in bottom @@ -49,7 +71,7 @@ typedef int mode_t; #define F_OK 00 #endif -#endif +#endif /*_WIN32*/ /*Warning: Cygwin with -ansi does not define these functions in its headers.*/ @@ -134,6 +156,9 @@ unsigned long long int strtoull(const char*, char**, int); #endif /*_WIN32*/ #ifndef nulldup +#ifndef _WIN32 +#pragma GCC diagnostic ignored "-Wnonnull" +#endif #define nulldup(s) ((s)==NULL?NULL:strdup(s)) #endif @@ -170,7 +195,6 @@ typedef unsigned long long uint64; typedef unsigned long long uint64_t; #endif -#ifndef _WIN32 #ifndef HAVE_UINTPTR_T #ifndef uintptr_t #if SIZEOF_VOIDP == 8 @@ -180,7 +204,6 @@ typedef unsigned long long uint64_t; #endif #endif #endif -#endif #ifndef HAVE_SIZE64_T typedef unsigned long long size64_t; diff --git a/include/ncjson.h b/include/ncjson.h index 1de7ca0cbf..7d2f58e660 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -3,7 +3,7 @@ */ #ifndef NCJSON_H -#define NCJSON_H +#define NCJSON_H 1 #ifndef OPTEXPORT #ifdef NETCDF_JSON_H @@ -11,13 +11,18 @@ #else /*!NETCDF_JSON_H*/ #ifdef _WIN32 #define OPTEXPORT __declspec(dllexport) -#else +#else /*!WIN32*/ #define OPTEXPORT extern -#endif +#endif /*WIN32*/ #endif /*NETCDF_JSON_H*/ #endif /*OPTEXPORT*/ /**************************************************/ + +/* Return codes */ +#define NCJ_OK 0 /* must equal NC_NOERR in netcdf.h */ +#define NCJ_ERR (-1) /* must equal NC_ERROR in netcdf.h */ + /* Json object sorts (note use of term sort rather than e.g. type or discriminant) */ #define NCJ_UNDEF 0 #define NCJ_STRING 1 @@ -30,6 +35,10 @@ #define NCJ_NSORTS 8 +/* Dump/text/unparse flags */ +#define NCJFLAG_NONE 0 +#define NCJFLAG_INDENTED 1 + /* Define a struct to store primitive values as unquoted strings. The sort will provide more info. Do not bother with a union since the amount of saved space is minimal. @@ -39,8 +48,9 @@ typedef struct NCjson { int sort; /* of this object */ char* string; /* sort != DICT|ARRAY */ struct NCjlist { - size_t len; - struct NCjson** contents; + size_t alloc; + size_t len; + struct NCjson** contents; } list; /* sort == DICT|ARRAY */ } NCjson; @@ -48,16 +58,15 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; -#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ -/* Return 0 if ok else -1 */ +/* Return NCJ_OK if ok else NCJ_ERR */ #if defined(__cplusplus) extern "C" { -#endif +#endif /*__cplusplus*/ /* Parse a string to NCjson*/ OPTEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp); @@ -78,26 +87,37 @@ OPTEXPORT int NCJnewstring(int sort, const char* value, NCjson** jsonp); OPTEXPORT int NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp); /* Get dict key value by name */ -OPTEXPORT int NCJdictget(const NCjson* dict, const char* key, const NCjson** valuep); +OPTEXPORT int NCJdictget(const NCjson* dict, const char* key, NCjson** valuep); + +/* Functional version of NCJdictget */ +OPTEXPORT NCjson* NCJdictlookup(const NCjson* dict, const char* key); /* Convert one json sort to value of another type; don't use union so we can know when to reclaim sval */ OPTEXPORT int NCJcvt(const NCjson* value, int outsort, struct NCJconst* output); -/* Insert an atomic value to an array or dict object. */ +/* Append an atomic value to an array or dict object. */ OPTEXPORT int NCJaddstring(NCjson* json, int sort, const char* s); /* Append value to an array or dict object. */ OPTEXPORT int NCJappend(NCjson* object, NCjson* value); -/* Insert key-value pair into a dict object. key will be copied */ -OPTEXPORT int NCJinsert(NCjson* object, const char* key, NCjson* value); +/* Append string value to an array or dict object. */ +OPTEXPORT int NCJappendstring(NCjson* object, int sort, const char* s); -/* Insert key-value pair as strings into a dict object. - key and value will be copied */ +/* Append int value to an array or dict object. */ +OPTEXPORT int NCJappendint(NCjson* object, long long n); + +/* Insert (string)key-(NCjson*)value pair into a dict object. key will be copied; jvalue will not */ +OPTEXPORT int NCJinsert(NCjson* object, const char* key, NCjson* jvalue); + +/* Insert key-value pair into a dict object. key and value will be copied */ OPTEXPORT int NCJinsertstring(NCjson* object, const char* key, const char* value); -/* Insert key-value pair where value is an int */ -OPTEXPORT int NCJinsertint(NCjson* object, const char* key, long long ivalue); +/* Overwrite key-value pair in a dict object. Act like NCJinsert if key not found */ +OPTEXPORT int NCJoverwrite(NCjson* object, const char* key, NCjson* value); + +/* Insert key-value pair into a dict object. key and value will be copied */ +OPTEXPORT int NCJinsertint(NCjson* object, const char* key, long long n); /* Unparser to convert NCjson object to text in buffer */ OPTEXPORT int NCJunparse(const NCjson* json, unsigned flags, char** textp); @@ -106,37 +126,57 @@ OPTEXPORT int NCJunparse(const NCjson* json, unsigned flags, char** textp); OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep); #ifndef NETCDF_JSON_H + /* dump NCjson* object to output file */ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); + /* convert NCjson* object to output string */ -OPTEXPORT const char* NCJtotext(const NCjson* json); +OPTEXPORT const char* NCJtotext(const NCjson* json, unsigned flags); + +/* Sort a dictionary by key */ +OPTEXPORT void NCJdictsort(NCjson* jdict); + #endif /*NETCDF_JSON_H*/ #if defined(__cplusplus) } -#endif +#endif /*__cplusplus*/ /* Getters */ #define NCJsort(x) ((x)->sort) #define NCJstring(x) ((x)->string) -#define NCJlength(x) ((x)==NULL ? 0 : (x)->list.len) -#define NCJdictlength(x) ((x)==NULL ? 0 : (x)->list.len/2) +#define NCJarraylength(x) ((x)==NULL ? 0 : (x)->list.len) +#define NCJdictlength(x) ((x)==NULL ? 0 : ((x)->list.len) / 2) #define NCJcontents(x) ((x)->list.contents) #define NCJith(x,i) ((x)->list.contents[i]) -#define NCJdictith(x,i) ((x)->list.contents[2*i]) +#define NCJdictkey(x,i) ((x)->list.contents[(i)*2]) +#define NCJdictvalue(x,i) ((x)->list.contents[((i)*2)+1]) /* Setters */ #define NCJsetsort(x,s) (x)->sort=(s) #define NCJsetstring(x,y) (x)->string=(y) #define NCJsetcontents(x,c) (x)->list.contents=(c) -#define NCJsetlength(x,l) (x)->list.len=(l) +#define NCJsetarraylength(x,l) (x)->list.len=(l) +#define NCJsetdictlength(x,l) (x)->list.len=((l)*2) /* Misc */ #define NCJisatomic(j) ((j)->sort != NCJ_ARRAY && (j)->sort != NCJ_DICT && (j)->sort != NCJ_NULL && (j)->sort != NCJ_UNDEF) /**************************************************/ +/* Error detection helper */ +#undef NCJDEBUG +#ifdef NCJDEBUG +static int +NCJBREAKPOINT(int err) +{ + (void)NCJBREAKPOINT; + return err; +} +#else +#define NCJBREAKPOINT(err) (err) +#endif /*NCJDEBUG*/ +#define NCJcheck(expr) do{if((expr) < 0) {stat = NCJBREAKPOINT(NCJ_ERR); goto done;}}while(0) -#endif /*NCJSON_H*/ - - +/**************************************************/ +#endif /*!NCJSON_H*/ /* Leave the ! as a tag for sed */ diff --git a/include/nclog.h b/include/nclog.h index aea5fb52e1..e848ee3219 100644 --- a/include/nclog.h +++ b/include/nclog.h @@ -11,7 +11,7 @@ #include #include "ncexternl.h" -#undef NCCATCH +#define NCCATCH #define NCENVLOGGING "NCLOGGING" #define NCENVTRACING "NCTRACING" @@ -25,7 +25,7 @@ #define NCLOGDEBUG (4) /* Everything */ /* Support ptr valued arguments that are used to store results */ -#define PTRVAL(t,p,d) ((t)((p) == NULL ? (d) : *(p))) +#define PTRVAL(t,p,d) (((p) == NULL ? (t)(d) : (t)*(p))) #if defined(_CPLUSPLUS_) || defined(__CPLUSPLUS__) extern "C" { diff --git a/include/ncproplist.h b/include/ncproplist.h index 9561789a6c..044f6ca255 100644 --- a/include/ncproplist.h +++ b/include/ncproplist.h @@ -22,12 +22,16 @@ /**************************************************/ /* This is used to store a property list mapping a small number of -fixed-sized key strings to an arbitrary uintptr_t value. The -uintptr_t type is used to ensure that the value can be a pointer or a -small string upto sizeof(uintptr_t) - 1 (for trailing nul). The big -problem is reclaiming the value if it a pointer. The fact that the -number of keys is small makes it feasible to use linear search. -This is currently only used for plugins, but may be extended to other uses. +keys to objects. The uintptr_t type is used to ensure that the value can be a pointer or a +small string upto sizeof(uintptr_t) - 1 (for trailing nul) or an integer constant. + +There are two operations that may be defined for the property: +1. reclaiming the value when proplist is free'd and property value points to allocated data of arbitrary complexity. +2. coping the value (for cloning) if it points to allocated data of arbitrary complexity. + +The fact that the number of keys is small makes it feasible to use +linear search. This is currently only used for plugins, but may be +extended to other uses. */ /*! Proplist-related structs. @@ -38,23 +42,40 @@ This is currently only used for plugins, but may be extended to other uses. 1. It is critical that |uintptr_t| == |void*| */ -#define NCPROPSMAXKEY 31 /* characters assert (NCPROPSMAXKEY+1)/8 == 0*/ +#define NCPROPSMAXKEY 31 /* characters; assert (NCPROPSMAXKEY+1)/8 == 0*/ -/* Returns 0 => error; 1 => success */ -typedef int (*NCPreclaimfcn)(uintptr_t userdata, const char* key, void* value, uintptr_t size); +/* Opaque forward */ +struct NCPpair; -/* The property list proper is a sequence of these objects */ -typedef struct NCProperty { +/* This function performs all of the following operations on a complex type */ +typedef enum NCPtypeop {NCP_RECLAIM=1,NCP_COPY=2} NCPtypeop; + +/* There are three possible types for a property value */ +typedef enum NCPtype { + NCP_CONST=0, /* Value is a simple uintptr_t constant */ + NCP_BYTES=2, /* Value points to a counted sequence of bytes; If a string, + then it includes the nul term character */ + NCP_COMPLEX=3 /* Value points to an arbitraryily complex structure */ +} NCPtype; + +/* (Returns < 0 => error) (>= 0 => success) */ +typedef int (*NCPtypefcn)(NCPtypeop op, struct NCPpair* input, struct NCPpair* output); + +/* Expose this prefix of NCProperty; used in clone and lookup */ +/* Hold just the key+value pair */ +typedef struct NCPpair { char key[NCPROPSMAXKEY+1]; /* copy of the key string; +1 for trailing nul */ - uintptr_t flags; -# define NCPF_SIMPLE (1<<0) /* non-reclaimable */ -# define NCPF_BYTES (1<<1) /* reclaimable bytes */ -# define NCPF_COMPLEX (1<<2) /* extended case */ + NCPtype sort; uintptr_t value; uintptr_t size; /* size = |value| as ptr to memory, if string, then include trailing nul */ - uintptr_t userdata; /* extra data for following functions */ - NCPreclaimfcn reclaim; -} NCProperty; +} NCPpair; + +/* The property list proper is a sequence of these objects */ +typedef struct NCPproperty { + NCPpair pair; /* Allowed by C language standard */ + uintptr_t userdata; /* extra data for the type function */ + NCPtypefcn typefcn; /* Process type operations */ +} NCPproperty; /* The property list object. @@ -62,7 +83,7 @@ The property list object. typedef struct NCproplist { size_t alloc; /* allocated space to hold properties */ size_t count; /* # of defined properties */ - NCProperty* properties; + NCPproperty* properties; } NCproplist; /**************************************************/ @@ -72,19 +93,24 @@ typedef struct NCproplist { extern "C" { #endif +/* All int valued functions return < 0 if error; >= 0 otherwise */ + + /* Create, free, etc. */ OPTEXPORT NCproplist* ncproplistnew(void); OPTEXPORT int ncproplistfree(NCproplist*); -/* Locate a proplist entry */ -OPTEXPORT int ncproplistadd(NCproplist* plist,const char* key, uintptr_t value); /* use when reclaim not needed */ - /* Insert properties */ OPTEXPORT int ncproplistadd(NCproplist* plist,const char* key, uintptr_t value); /* use when reclaim not needed */ OPTEXPORT int ncproplistaddstring(NCproplist* plist, const char* key, const char* str); /* use when value is simple string (char*) */ -OPTEXPORT int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size); /* use when value is simple ptr and reclaim is simple free function */ -OPTEXPORT int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPreclaimfcn); /* fully extended case */ +/* Insert an instance of type NCP_BYTES */ +OPTEXPORT int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size); + +/* Add instance of a complex type */ +OPTEXPORT int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPtypefcn typefcn); + +/* clone; keys are copies and values are copied using the NCPtypefcn */ OPTEXPORT int ncproplistclone(const NCproplist* src, NCproplist* clone); /* @@ -105,4 +131,4 @@ OPTEXPORT int ncproplistith(const NCproplist*, size_t i, char* const * keyp, uin } #endif -#endif /*NCPROPLIST_H*/ +#endif /*!NCPROPLIST_H*/ /* WARNING: Do not remove the !; used in building netcdf_proplist.h */ diff --git a/include/ncrc.h b/include/ncrc.h index 5705f723ac..a49b06d065 100644 --- a/include/ncrc.h +++ b/include/ncrc.h @@ -46,6 +46,7 @@ typedef struct NCRCinfo { /* Opaque structures */ struct NCS3INFO; +enum NCS3SVC; #if defined(__cplusplus) extern "C" { @@ -83,6 +84,9 @@ EXTERNL int NC_addmodetag(NCURI* uri, const char* tag); EXTERNL int NC_split_delim(const char* path, char delim, NClist* segments); EXTERNL int NC_join(struct NClist* segments, char** pathp); EXTERNL int NC_joinwith(NClist* segments, const char* sep, const char* prefix, const char* suffix, char** pathp); +EXTERNL void NC_sortenvv(size_t n, char** envv); +EXTERNL void NC_sortlist(NClist* l); +EXTERNL void NC_freeenvv(size_t n, char** envv); #if defined(__cplusplus) } diff --git a/include/ncs3sdk.h b/include/ncs3sdk.h index adc7e456be..f81f670efc 100644 --- a/include/ncs3sdk.h +++ b/include/ncs3sdk.h @@ -14,10 +14,16 @@ /* Track the server type, if known */ typedef enum NCS3SVC {NCS3UNK=0, /* unknown */ - NCS3=1, /* s3.amazon.aws */ - NCS3GS=2 /* storage.googleapis.com */ + NCS3=1, /* s3.amazon.aws */ + NCS3GS=2, /* storage.googleapis.com */ +#ifdef NETCDF_ENABLE_ZOH + NCS3ZOH=4, /* ZoH Server */ +#endif } NCS3SVC; +/* Opaque Handles */ +struct NClist; + typedef struct NCS3INFO { char* host; /* non-null if other*/ char* region; /* region */ @@ -55,9 +61,10 @@ EXTERNL int NC_s3sdkbucketdelete(void* s3client, NCS3INFO* info, char** errmsgp) EXTERNL int NC_s3sdkinfo(void* client0, const char* bucket, const char* pathkey, unsigned long long* lenp, char** errmsgp); EXTERNL int NC_s3sdkread(void* client0, const char* bucket, const char* pathkey, unsigned long long start, unsigned long long count, void* content, char** errmsgp); EXTERNL int NC_s3sdkwriteobject(void* client0, const char* bucket, const char* pathkey, unsigned long long count, const void* content, char** errmsgp); -EXTERNL int NC_s3sdkclose(void* s3client0, NCS3INFO* info, int deleteit, char** errmsgp); -EXTERNL int NC_s3sdkgetkeys(void* s3client0, const char* bucket, const char* prefix, size_t* nkeysp, char*** keysp, char** errmsgp); -EXTERNL int NC_s3sdksearch(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp); +EXTERNL int NC_s3sdkclose(void* s3client0, char** errmsgp); +EXTERNL int NC_s3sdktruncate(void* s3client0, const char* bucket, const char* prefix, char** errmsgp); +EXTERNL int NC_s3sdklist(void* s3client0, const char* bucket, const char* prefix, size_t* nkeysp, char*** keysp, char** errmsgp); +EXTERNL int NC_s3sdklistall(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp); EXTERNL int NC_s3sdkdeletekey(void* client0, const char* bucket, const char* pathkey, char** errmsgp); /* From ds3util.c */ diff --git a/include/netcdf.h b/include/netcdf.h index 934bdd998d..61d1bcabde 100644 --- a/include/netcdf.h +++ b/include/netcdf.h @@ -342,14 +342,18 @@ there. */ #define NC_SZIP_NN 32 /**< SZIP NN option mask. */ #define NC_SZIP_EC 4 /**< SZIP EC option mask. */ +/* If this is extended, then you need to modify nc4internal.c */ #define NC_NOQUANTIZE 0 /**< No quantization in use. */ #define NC_QUANTIZE_BITGROOM 1 /**< Use BitGroom quantization. */ #define NC_QUANTIZE_GRANULARBR 2 /**< Use Granular BitRound quantization. */ #define NC_QUANTIZE_BITROUND 3 /**< Use BitRound quantization. */ +#define NC_QUANTIZE_MAX NC_QUANTIZE_BITROUND /**@{*/ /** When quantization is used for a variable, an attribute of the - * appropriate name is added. */ + * appropriate name is added. + * If this set is extended, then propogate to NC_quantize_atts in nc4internal.c + */ #define NC_QUANTIZE_BITGROOM_ATT_NAME "_QuantizeBitGroomNumberOfSignificantDigits" #define NC_QUANTIZE_GRANULARBR_ATT_NAME "_QuantizeGranularBitRoundNumberOfSignificantDigits" #define NC_QUANTIZE_BITROUND_ATT_NAME "_QuantizeBitRoundNumberOfSignificantBits" @@ -376,7 +380,12 @@ there. */ #define NC_ISSYSERR(err) ((err) > 0) #define NC_NOERR 0 /**< No Error */ +#if 0 +/* This is unused, so re-purpose it to generic error */ #define NC2_ERR (-1) /**< Returned for all errors in the v2 API. */ +#else +#define NC_ERROR (-1) /**< Returned for generic errors */ +#endif /** Not a netcdf id. @@ -531,8 +540,10 @@ by the desired type. */ #define NC_EOBJECT (-140) /**< Some object exists when it should not */ #define NC_ENOOBJECT (-141) /**< Some object not found */ #define NC_EPLUGIN (-142) /**< Unclassified failure in accessing a dynamically loaded plugin> */ +#define NC_ENOTZARR (-143) /**< Malformed (NC)Zarr file */ +#define NC_EZARRMETA (-144) /**< Malformed (NC)Zarr file consolidated metadata */ -#define NC4_LAST_ERROR (-142) /**< @internal All netCDF errors > this. */ +#define NC4_LAST_ERROR (-144) /**< @internal All netCDF errors > this. */ /* * Don't forget to update docs/all-error-codes.md if adding new error codes here! diff --git a/include/netcdf_filter.h b/include/netcdf_filter.h index 70feb719ad..b5202f5d43 100644 --- a/include/netcdf_filter.h +++ b/include/netcdf_filter.h @@ -65,6 +65,10 @@ BLOSC_BITSHUFFLE=2 /* bit-wise shuffle */ enum BLOSC_SUBCOMPRESSORS {BLOSC_LZ=0, BLOSC_LZ4=1, BLOSC_LZ4HC=2, BLOSC_SNAPPY=3, BLOSC_ZLIB=4, BLOSC_ZSTD=5}; #endif +/* Codecs for hdf5 filters that do not have a codec */ +#define H5Z_FILTER_RAW ((unsigned int)((int)-1)) /* Fake filter id */ +#define H5Z_CODEC_RAW "_hdf5raw_" + #if defined(__cplusplus) extern "C" { #endif diff --git a/include/netcdf_filter_build.h b/include/netcdf_filter_build.h index 11c80f8095..c0b55508b3 100644 --- a/include/netcdf_filter_build.h +++ b/include/netcdf_filter_build.h @@ -22,6 +22,16 @@ #include "netcdf_filter_hdf5_build.h" +/* Avoid including netcdf_json.h and ncjson.h */ +#ifndef NCJSON_H +#include "netcdf_json.h" +#endif /*NCJSON_H*/ + +/* Ditto */ +#ifndef NCPROPLIST_H +#include "netcdf_proplist.h" +#endif + /**************************************************/ /* Build To a NumCodecs-style C-API for Filters */ @@ -84,19 +94,21 @@ The function pointers defined in NCZ_codec_t manipulate HDF5 parameters and NumC * Initialize use of the filter. This is invoked when a filter is loaded. -void (*NCZ_codec_initialize)(void); +void (*NCZ_codec_initialize)(struct NCproplist*); * Finalize use of the filter. Since HDF5 does not provide this functionality, the codec may need to do it. See H5Zblosc.c for an example. This function is invoked when a filter is unloaded. -void (*NCZ_codec_finalize)(void); +void (*NCZ_codec_finalize)(struct NCproplist*); * Convert a JSON representation to an HDF5 representation. Invoked when a NumCodec JSON Codec is extracted from Zarr metadata. -int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp); +int (*NCZ_codec_to_hdf5)(struct NCproplist* env, const char* codec, unsigned int* idp, size_t* nparamsp, unsigned** paramsp); +@param env -- (in) extra environmental information @param codec -- (in) ptr to JSON string representing the codec. +@param idp -- the hdf5 filter id number; @param nparamsp -- (out) store the length of the converted HDF5 unsigned vector @param paramsp -- (out) store a pointer to the converted HDF5 unsigned vector; caller frees. Note the double indirection. @@ -105,8 +117,10 @@ int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp); * Convert an HDF5 vector of visible parameters to a JSON representation. -int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); +int (*NCZ_hdf5_to_codec)(struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp); +@param env -- (in) extra environmental information +@param id -- the hdf5 filter id number; @param nparams -- (in) the length of the HDF5 unsigned vector @param params -- (in) pointer to the HDF5 unsigned vector. @param codecp -- (out) store the string representation of the codec; caller must free. @@ -115,10 +129,10 @@ int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); * Convert a set of visible parameters to a set of working parameters using extra environmental information. Also allows for changes to the visible parameters. Invoked before filter is actually used. -int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); +int (*NCZ_modify_parameters)(const struct NCproplist* env, unsigned* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); -@param ncid -- (in) ncid of the variable's group -@param varid -- (in) varid of the variable +@param env -- (in) properties, including file ncid and the variable varid +@param idp -- (in/out) the hdf5 filter id number; @params vnparamsp -- (in/out) number of visible parameters @params vparamsp -- (in/out) vector of visible parameters @params wnparamsp -- (out) number of working parameters @@ -127,8 +141,10 @@ int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** * Convert an HDF5 vector of visible parameters to a JSON representation. -int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); +-int (*NCZ_hdf5_to_codec)(const struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp); +@param env -- (in) extra environmental information +@param id -- (in) the hdf5 filter id number; @param nparams -- (in) the length of the HDF5 unsigned vector @param params -- (in) pointer to the HDF5 unsigned vector. @param codecp -- (out) store the string representation of the codec; caller must free. @@ -136,6 +152,22 @@ int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); */ +/* Opaque */ +struct NCproplist; +struct NCjson; + +/* Test if JSON dict is in raw format. +@param jraw to test +@return NCJ_OK if in raw format; NCJ_ERR/NC_ERROR otherwise. +*/ +#ifndef NCraw_test +#define NC_RAWTAG "hdf5raw" +#define NC_RAWVERSION "1" +#define NCraw_test(jraw) (jraw == NULL || NCJsort(jraw) != NCJ_DICT \ + ? NCJ_ERR \ + : (strcmp(NCJstring(NCJdictlookup(jraw,NC_RAWTAG)),NC_RAWVERSION)!=0 ? NCJ_ERR : NCJ_OK)) +#endif /*NCraw_test*/ + /* The struct that provides the necessary filter info. The combination of version + sort uniquely determines @@ -146,12 +178,12 @@ typedef struct NCZ_codec_t { int sort; /* Format of remainder of the struct; Currently always NCZ_CODEC_HDF5 */ const char* codecid; /* The name/id of the codec */ - unsigned int hdf5id; /* corresponding hdf5 id */ - void (*NCZ_codec_initialize)(void); - void (*NCZ_codec_finalize)(void); - int (*NCZ_codec_to_hdf5)(const char* codec, size_t* nparamsp, unsigned** paramsp); - int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp); - int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); + unsigned hdf5id; /* corresponding hdf5 id */ + void (*NCZ_codec_initialize)(const struct NCproplist* env); + void (*NCZ_codec_finalize)(const struct NCproplist* env); + int (*NCZ_codec_to_hdf5)(const struct NCproplist* env, const char* codec, unsigned* idp, size_t* nparamsp, unsigned** paramsp); + int (*NCZ_hdf5_to_codec)(const struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp); + int (*NCZ_modify_parameters)(const struct NCproplist* env, unsigned* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp); } NCZ_codec_t; #ifndef NC_UNUSED diff --git a/include/netcdf_filter_hdf5_build.h b/include/netcdf_filter_hdf5_build.h index 63d2857eef..34de979ad0 100644 --- a/include/netcdf_filter_hdf5_build.h +++ b/include/netcdf_filter_hdf5_build.h @@ -39,16 +39,14 @@ #include /* Older versions of the hdf library may define H5PL_type_t here */ #include - #else /*!USE_HDF5*/ /* Provide replacement definitions */ - /* WARNING: In order make NCZARR independent of HDF5, while still using HDF5-style filters, some HDF5 declarations need to be duplicated here with different names. Watch out for changes in the underlying HDF5 declarations. - See the file H5Zpublic.h for more detailed descriptions. + See the file H5Zpublic.h or H5Zdevelop.h for more detailed descriptions. Note that these declarations are always enabled because HDF5-style filters may have been created with these definitions @@ -61,8 +59,9 @@ /* H5Z_FILTER_RESERVED => H5Z_FILTER_RESERVED */ #define H5Z_FILTER_RESERVED 256 /*filter ids below this value are reserved for library use */ -/* H5Z_FILTER_MAX => H5Z_FILTER_MAX */ +#ifndef H5Z_FILTER_MAX #define H5Z_FILTER_MAX 65535 /*maximum filter id */ +#endif /* Only a limited set of definition and invocation flags are allowed */ #define H5Z_FLAG_MANDATORY 0x0000 /*filter is mandatory */ diff --git a/libdispatch/ncutil.h b/include/netcdf_vutils.h similarity index 89% rename from libdispatch/ncutil.h rename to include/netcdf_vutils.h index 44ab508b36..0324bade45 100644 --- a/libdispatch/ncutil.h +++ b/include/netcdf_vutils.h @@ -1,8 +1,8 @@ -/* Copyright 2018, UCAR/Unidata and OPeNDAP, Inc. +/* Copyright 2018, UCAR/Unidata See the COPYRIGHT file for more information. */ -#ifndef UTILS_H -#define UTILS_H 1 +#ifndef NCVUTILS_H +#define NCVUTILS_H 1 /* Define a header-only simple version of a dynamically expandable list and byte buffer */ /* To be used in code that should be independent of libnetcdf */ @@ -15,8 +15,8 @@ typedef struct VList { typedef struct VString { int nonextendible; /* 1 => fail if an attempt is made to extend this string*/ - unsigned int alloc; - unsigned int length; + unsigned alloc; + unsigned length; char* content; } VString; @@ -55,7 +55,7 @@ static void vlistexpand(VList* l) { void** newcontent = NULL; - size_t newsz; + unsigned newsz; if(l == NULL) return; newsz = (l->length * 2) + 1; /* basically double allocated space */ @@ -132,11 +132,11 @@ static void vsexpand(VString* vs) { char* newcontent = NULL; - size_t newsz; + unsigned newsz; if(vs == NULL) return; assert(vs->nonextendible == 0); - newsz = (vs->alloc + VSTRALLOC); /* basically double allocated space */ + newsz = (vs->alloc + VSTRALLOC); /* increase allocated space */ if(vs->alloc >= newsz) return; /* space already allocated */ newcontent=(char*)calloc(1,newsz+1);/* always room for nul term */ assert(newcontent != NULL); @@ -154,7 +154,7 @@ vsappendn(VString* vs, const char* elem, unsigned n) { size_t need; assert(vs != NULL && elem != NULL); - if(n == 0) {n = strlen(elem);} + if(n == 0) {n = (unsigned)strlen(elem);} need = vs->length + n; if(vs->nonextendible) { /* Space must already be available */ @@ -166,7 +166,7 @@ vsappendn(VString* vs, const char* elem, unsigned n) memcpy(&vs->content[vs->length],elem,n); vs->length += n; if(!vs->nonextendible) - vs->content[vs->length] = '\0'; + vs->content[vs->length] = '\0'; /* guarantee nul term */ } static void @@ -196,7 +196,12 @@ static char* vsextract(VString* vs) { char* x = NULL; - if(vs == NULL || vs->content == NULL) return NULL; + if(vs == NULL) return NULL; + if(vs->content == NULL) { + /* guarantee content existence and nul terminated */ + if((vs->content = calloc(1,sizeof(char)))==NULL) return NULL; + vs->length = 0; + } x = vs->content; vs->content = NULL; vs->length = 0; @@ -229,14 +234,14 @@ util_initialize(void) /* Following are always "in-lined"*/ #define vlistcontents(l) ((l)==NULL?NULL:(l)->content) -#define vlistlength(l) ((l)==NULL?0:(int)(l)->length) +#define vlistlength(l) ((l)==NULL?0:(l)->length) #define vlistclear(l) vlistsetlength(l,0) #define vlistsetlength(l,len) do{if((l)!=NULL) (l)->length=len;} while(0) #define vscontents(vs) ((vs)==NULL?NULL:(vs)->content) -#define vslength(vs) ((vs)==NULL?0:(int)(vs)->length) +#define vslength(vs) ((vs)==NULL?0:(vs)->length) #define vscat(vs,s) vsappendn(vs,s,0) #define vsclear(vs) vssetlength(vs,0) #define vssetlength(vs,len) do{if((vs)!=NULL) (vs)->length=len;} while(0) -#endif /*UTILS_H*/ +#endif /*NCVUTIL_H*/ diff --git a/libdap2/ncd2dispatch.c b/libdap2/ncd2dispatch.c index 87b4c2eb82..23544d667d 100644 --- a/libdap2/ncd2dispatch.c +++ b/libdap2/ncd2dispatch.c @@ -806,7 +806,7 @@ fprintf(stderr,"\n"); NCattribute* att = (NCattribute*)nclistget(var->attributes,j); char* val = NULL; /* Check for _FillValue/Variable mismatch */ - if(strcmp(att->name,"_FillValue")==0) { + if(strcmp(att->name,NC_FillValue)==0) { /* Special case var is byte, fillvalue is int16 and unsignedattr == 0; This exception is needed because DAP2 byte type diff --git a/libdap4/d4http.c b/libdap4/d4http.c index eb144c93ba..2006cd3da7 100644 --- a/libdap4/d4http.c +++ b/libdap4/d4http.c @@ -110,8 +110,11 @@ WriteMemoryCallback(void *ptr, size_t size, size_t nmemb, void *data) nclog(NCLOGWARN,"WriteMemoryCallback: zero sized chunk"); /* Optimize for reading potentially large dods datasets */ while(!ncbytesavail(buf,realsize)) { - /* double the size of the packet */ - ncbytessetalloc(buf,2*ncbytesalloc(buf)); + /* double the size of the packet (unless the buf is empty) */ + if(ncbytesalloc(buf) == 0) + ncbytessetalloc(buf,1024); + else + ncbytessetalloc(buf,2*ncbytesalloc(buf)); } ncbytesappendn(buf, ptr, realsize); #ifdef PROGRESS diff --git a/libdap4/d4meta.c b/libdap4/d4meta.c index bd04310d34..d11243045f 100644 --- a/libdap4/d4meta.c +++ b/libdap4/d4meta.c @@ -739,7 +739,7 @@ compileAttrValues(NCD4meta* builder, NCD4node* attr, void** memoryp, NClist* blo memset((void*)&converter,0,sizeof(converter)); /* Deal with _FillValue */ - if(container->sort == NCD4_VAR && strcmp(attr->name,"_FillValue")==0) { + if(container->sort == NCD4_VAR && strcmp(attr->name,NC_FillValue)==0) { /* Verify or fix or ignore or fail on type mismatch */ if(container->basetype != basetype) {/* _FillValue/Variable type mismatch */ int compatible = isfilltypecompatible(container->basetype, basetype); diff --git a/libdap4/d4parser.c b/libdap4/d4parser.c index a557ce1177..9250d5316f 100644 --- a/libdap4/d4parser.c +++ b/libdap4/d4parser.c @@ -65,10 +65,11 @@ static const struct KEYWORDINFO { }; typedef struct KEYWORDINFO KEYWORDINFO; -static const struct ATOMICTYPEINFO { +/* Warning do not make const because sort will modify */ +static struct ATOMICTYPEINFO { char* name; nc_type type; size_t size; } atomictypeinfo[] = { -/* Keep in sorted order for binary search */ +/* Will be sorted on first use */ /* Use lower case for canonical comparison, but keep proper name here */ {"Byte",NC_BYTE,sizeof(char)}, {"Char",NC_CHAR,sizeof(char)}, @@ -85,8 +86,9 @@ static const struct ATOMICTYPEINFO { {"UInt64",NC_UINT64,sizeof(unsigned long long)}, {"UInt8",NC_UBYTE,sizeof(unsigned char)}, {"Url",NC_STRING,sizeof(char*)}, -{NULL,NC_NAT,0} }; +#define NCD4_NATOMICTYPES (sizeof(atomictypeinfo)/sizeof(struct ATOMICTYPEINFO)) +static int atomictypessorted = 0; /***************************************************/ @@ -116,7 +118,7 @@ static NCD4node* getOpaque(NCD4parser*, ncxml_t varxml, NCD4node* group); static int getValueStrings(NCD4parser*, NCD4node*, ncxml_t xattr, NClist*); static int isReserved(const char* name); static const KEYWORDINFO* keyword(const char* name); -static NCD4node* lookupAtomicType(NClist*, const char* name); +static NCD4node* lookupAtomicType(NClist*,const char* name); static NCD4node* lookFor(NClist* elems, const char* name, NCD4sort sort); static NCD4node* lookupFQN(NCD4parser*, const char* sfqn, NCD4sort); static int lookupFQNList(NCD4parser*, NClist* fqn, NCD4sort sort, NCD4node** result); @@ -764,6 +766,7 @@ parseMaps(NCD4parser* parser, NCD4node* var, ncxml_t xml) int ret = NC_NOERR; ncxml_t x; + NC_UNUSED(parser); for(x=ncxml_child(xml, "Map");x!= NULL;x=ncxml_next(x,"Map")) { char* fqn; fqn = ncxml_attr(x,"name"); @@ -937,6 +940,8 @@ static int getValueStrings(NCD4parser* parser, NCD4node* type, ncxml_t xattr, NClist* svalues) { char* s; + NC_UNUSED(parser); + NC_UNUSED(type); /* See first if we have a "value" xml attribute */ s = ncxml_attr(xattr,"value"); if(s != NULL) @@ -1249,7 +1254,7 @@ defineBytestringType(NCD4parser* parser) if(ret != NC_NOERR) goto done; SETNAME(bstring,"_bytestring"); bstring->opaque.size = 0; - bstring->basetype = lookupAtomicType(parser,"UInt8"); + bstring->basetype = lookupAtomicType(parser->meta->atomictypes,"UInt8"); PUSH(parser->metadata->root->types,bstring); parser->metadata->_bytestring = bstring; } else @@ -1259,16 +1264,25 @@ defineBytestringType(NCD4parser* parser) } #endif +static int atisort(const void* a, const void* b) +{ + return strcasecmp(((struct ATOMICTYPEINFO*)a)->name,((struct ATOMICTYPEINFO*)b)->name); +} + static int defineAtomicTypes(NCD4meta* meta, NClist* list) { int ret = NC_NOERR; NCD4node* node; - const struct ATOMICTYPEINFO* ati; + size_t i; - if(list == NULL) - return THROW(NC_EINTERNAL); - for(ati=atomictypeinfo;ati->name;ati++) { + if(list == NULL) return THROW(NC_EINTERNAL); + if(!atomictypessorted) { + qsort((void*)atomictypeinfo, NCD4_NATOMICTYPES,sizeof(struct ATOMICTYPEINFO),atisort); + atomictypessorted = 1; + } + for(i=0;itype,&node))) goto done; SETNAME(node,ati->name); PUSH(list,node); @@ -1277,29 +1291,26 @@ defineAtomicTypes(NCD4meta* meta, NClist* list) return THROW(ret); } +static int +aticmp(const void* a, const void* b) +{ + const char* name = (const char*)a; + NCD4node** nodebp = (NCD4node**)b; + return strcasecmp(name,(*nodebp)->name); +} + /* Binary search the set of set of atomictypes */ static NCD4node* lookupAtomicType(NClist* atomictypes, const char* name) { - size_t n = nclistlength(atomictypes); - if (n == 0) return NULL; - size_t L = 0; - size_t R = n - 1; - NCD4node* p; - - for(;;) { - if(L > R) break; - size_t m = (L + R) / 2; - p = (NCD4node*)nclistget(atomictypes,m); - int cmp = strcasecmp(p->name,name); - if(cmp == 0) - return p; - if(cmp < 0) - L = (m + 1); - else /*cmp > 0*/ - R = (m - 1); - } - return NULL; + void* match = NULL; + size_t ntypes = 0; + NCD4node** types = NULL; + assert(atomictypessorted && nclistlength(atomictypes) > 0); + ntypes = nclistlength(atomictypes); + types = (NCD4node**)atomictypes->content; + match = bsearch((void*)name,(void*)types,ntypes,sizeof(NCD4node*),aticmp); + return (match==NULL?NULL:*(NCD4node**)match); } /**************************************************/ @@ -1650,6 +1661,7 @@ parseForwards(NCD4parser* parser, NCD4node* root) int ret = NC_NOERR; size_t i,j; + NC_UNUSED(root); /* process all vars */ for(i=0;ivars);i++) { NCD4node* var = (NCD4node*)nclistget(parser->vars,i); diff --git a/libdispatch/CMakeLists.txt b/libdispatch/CMakeLists.txt index 0f5d66d085..68251ba185 100644 --- a/libdispatch/CMakeLists.txt +++ b/libdispatch/CMakeLists.txt @@ -8,10 +8,9 @@ add_library(dispatch OBJECT) target_sources(dispatch PRIVATE - dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c dinfermodel.c - daux.c dinstance.c dinstance_intern.c - dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c dparallel.c dmissing.c - ncproplist.c +dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c dinfermodel.c +daux.c dinstance.c dinstance_intern.c +dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c dparallel.c dmissing.c ncproplist.c ) if (NETCDF_ENABLE_DLL) diff --git a/libdispatch/Makefile.am b/libdispatch/Makefile.am index 8d35269ea3..9001ed4c25 100644 --- a/libdispatch/Makefile.am +++ b/libdispatch/Makefile.am @@ -51,7 +51,7 @@ endif # NETCDF_ENABLE_BYTERANGE if NETCDF_ENABLE_S3 if NETCDF_ENABLE_S3_INTERNAL # Renamed to avoid conflicts with the HDF5 files -libdispatch_la_SOURCES += ncs3sdk_h5.c nch5s3comms.c nch5s3comms.h ncutil.h nccurl_setup.h \ +libdispatch_la_SOURCES += ncs3sdk_h5.c nch5s3comms.c nch5s3comms.h nccurl_setup.h \ nccurl_sha256.c nccurl_sha256.h nccurl_hmac.c nccurl_hmac.h AM_CPPFLAGS += -I$(top_srcdir)/libncxml libdispatch_la_CPPFLAGS += ${AM_CPPFLAGS} diff --git a/libdispatch/daux.c b/libdispatch/daux.c index 71e2a7c524..c79be9cca7 100644 --- a/libdispatch/daux.c +++ b/libdispatch/daux.c @@ -952,6 +952,8 @@ This function is just a wrapper around nc_dump__data. @return error code */ +EXTERNL int nc_dump_data(int ncid, nc_type xtype, void* memory, size_t count, char** bufp); + EXTERNL int ncaux_dump_data(int ncid, int xtype, void* memory, size_t count, char** bufp) { diff --git a/libdispatch/dcopy.c b/libdispatch/dcopy.c index 82acc353ad..33ac830e04 100644 --- a/libdispatch/dcopy.c +++ b/libdispatch/dcopy.c @@ -12,15 +12,12 @@ #include "nc_logging.h" #include "nclist.h" +/* Forward */ static int NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2); - #ifdef USE_NETCDF4 - static int searchgroup(int ncid1, int tid1, int grp, int* tid2); static int searchgrouptree(int ncid1, int tid1, int grp, int* tid2); - -#endif /*USE_NETCDF4*/ - +#endif #ifdef USE_NETCDF4 /** @@ -59,8 +56,8 @@ NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2, int *equalp) } else { - size_t i; int ret, equal1; + size_t i; char name1[NC_MAX_NAME]; char name2[NC_MAX_NAME]; size_t size1, size2; @@ -212,44 +209,6 @@ NC_rec_find_nc_type(int ncid1, nc_type tid1, int ncid2, nc_type* tid2) #endif /* USE_NETCDF4 */ -/** - * @internal Given a type in one file, find its equal (if any) in - * another file. It sounds so simple, but it's a real pain! - * - * @param ncid1 File ID. - * @param xtype1 Type ID. - * @param ncid2 File ID. - * @param xtype2 Pointer that gets type ID of equal type. - * - * @return ::NC_NOERR No error. - * @author Ed Hartnett -*/ -static int -NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2) -{ - int ret = NC_NOERR; - - /* Check input */ - if(xtype1 <= NC_NAT) - return NC_EINVAL; - - /* Handle atomic types. */ - if (xtype1 <= NC_MAX_ATOMIC_TYPE) - { - if(xtype2) - *xtype2 = xtype1; - return NC_NOERR; - } - -#ifdef USE_NETCDF4 - /* Recursively search group ncid2 and its children - to find a type that is equal (using compare_type) - to xtype1. */ - ret = NC_rec_find_nc_type(ncid1, xtype1 , ncid2, xtype2); -#endif /* USE_NETCDF4 */ - return ret; -} - /** * This will copy a variable that is an array of primitive type and * its attributes from one file to another, assuming dimensions in the @@ -737,5 +696,45 @@ searchgrouptree(int ncid1, int tid1, int grp, int* tid2) return ret; } -#endif /* USE_NETCDF4 */ +#endif + +/** + * @internal Given a type in one file, find its equal (if any) in + * another file. It sounds so simple, but it's a real pain! + * + * @param ncid1 File ID. + * @param xtype1 Type ID. + * @param ncid2 File ID. + * @param xtype2 Pointer that gets type ID of equal type. + * + * @return ::NC_NOERR No error. + * @return ::NC_EBADTYPE + * @author Ed Hartnett +*/ +static int +NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2) +{ + int ret = NC_NOERR; + + /* Check input */ + if(xtype1 <= NC_NAT) + return NC_EINVAL; + + /* Handle atomic types. */ + if (xtype1 <= NC_MAX_ATOMIC_TYPE) + { + if(xtype2) + *xtype2 = xtype1; + return NC_NOERR; + } +#ifdef USE_NETCDF4 + /* Recursively search group ncid2 and its children + to find a type that is equal (using compare_type) + to xtype1. */ + ret = NC_rec_find_nc_type(ncid1, xtype1 , ncid2, xtype2); +#else + ret = NC_EBADTYPE; +#endif + return ret; +} diff --git a/libdispatch/ddispatch.c b/libdispatch/ddispatch.c index 4ccc65d8d2..79b893f44b 100644 --- a/libdispatch/ddispatch.c +++ b/libdispatch/ddispatch.c @@ -4,25 +4,27 @@ See LICENSE.txt for license information. */ #include "config.h" -#include "ncdispatch.h" -#include "ncuri.h" -#include "nclog.h" -#include "ncbytes.h" -#include "ncrc.h" -#include "ncoffsets.h" -#include "ncpathmgr.h" -#include "ncxml.h" -#include "nc4internal.h" /* Required for getcwd, other functions. */ #ifdef HAVE_UNISTD_H #include #endif - /* Required for getcwd, other functions. */ #ifdef _WIN32 #include #endif +#include + +#include "netcdf.h" +#include "ncdispatch.h" +#include "ncuri.h" +#include "nclog.h" +#include "ncbytes.h" +#include "ncrc.h" +#include "ncoffsets.h" +#include "ncpathmgr.h" +#include "ncxml.h" +#include "nc4internal.h" #if defined(NETCDF_ENABLE_BYTERANGE) || defined(NETCDF_ENABLE_DAP) || defined(NETCDF_ENABLE_DAP4) #include @@ -32,18 +34,52 @@ See LICENSE.txt for license information. #include "ncs3sdk.h" #endif +/**************************************************/ +/* Global State constants and state */ + #define MAXPATH 1024 +/* The singleton global state object */ +static NCglobalstate* nc_globalstate = NULL; + /* Define vectors of zeros and ones for use with various nc_get_varX functions */ /* Note, this form of initialization fails under Cygwin */ size_t NC_coord_zero[NC_MAX_VAR_DIMS] = {0}; size_t NC_coord_one[NC_MAX_VAR_DIMS] = {1}; ptrdiff_t NC_stride_one[NC_MAX_VAR_DIMS] = {1}; -/* -static nc_type longtype = (sizeof(long) == sizeof(int)?NC_INT:NC_INT64); -static nc_type ulongtype = (sizeof(unsigned long) == sizeof(unsigned int)?NC_UINT:NC_UINT64); -*/ +/**************************************************/ +/* Atomic type constants */ + +/* The sizes of types may vary from platform to platform, but within + * netCDF files, type sizes are fixed. */ +#define NC_CHAR_LEN sizeof(char) /**< @internal Size of char. */ +#define NC_STRING_LEN sizeof(char *) /**< @internal Size of char *. */ +#define NC_BYTE_LEN 1 /**< @internal Size of byte. */ +#define NC_SHORT_LEN 2 /**< @internal Size of short. */ +#define NC_INT_LEN 4 /**< @internal Size of int. */ +#define NC_FLOAT_LEN 4 /**< @internal Size of float. */ +#define NC_DOUBLE_LEN 8 /**< @internal Size of double. */ +#define NC_INT64_LEN 8 /**< @internal Size of int64. */ + +/** @internal Names of atomic types. */ +const char* nc4_atomic_name[NUM_ATOMIC_TYPES] = {"none", "byte", "char", + "short", "int", "float", + "double", "ubyte", + "ushort", "uint", + "int64", "uint64", "string"}; +static const size_t nc4_atomic_size[NUM_ATOMIC_TYPES] = {0, NC_BYTE_LEN, NC_CHAR_LEN, NC_SHORT_LEN, + NC_INT_LEN, NC_FLOAT_LEN, NC_DOUBLE_LEN, + NC_BYTE_LEN, NC_SHORT_LEN, NC_INT_LEN, NC_INT64_LEN, + NC_INT64_LEN, NC_STRING_LEN}; + +/**************************************************/ +/* Forward */ +static int NC_createglobalstate(void); + +/**************************************************/ +/** \defgroup dispatch_initialize functions. */ +/** \{ */ /* Allow dispatch to do general initialization and finalization */ int @@ -141,16 +177,9 @@ NCDISPATCH_finalize(void) NC_freeglobalstate(); /* should be one of the last things done */ return status; } +/** \} */ /**************************************************/ -/* Global State constants and state */ - -/* The singleton global state object */ -static NCglobalstate* nc_globalstate = NULL; - -/* Forward */ -static int NC_createglobalstate(void); - /** \defgroup global_state Global state functions. */ /** \{ @@ -225,34 +254,13 @@ NC_freeglobalstate(void) /** \} */ /**************************************************/ -/** \defgroup atomic_types Atomic Type functions */ +/** \defgroup atomic_types Atomic Type functions, where + atomic does not include NC_STRING. */ /** \{ \ingroup atomic_types */ -/* The sizes of types may vary from platform to platform, but within - * netCDF files, type sizes are fixed. */ -#define NC_CHAR_LEN sizeof(char) /**< @internal Size of char. */ -#define NC_STRING_LEN sizeof(char *) /**< @internal Size of char *. */ -#define NC_BYTE_LEN 1 /**< @internal Size of byte. */ -#define NC_SHORT_LEN 2 /**< @internal Size of short. */ -#define NC_INT_LEN 4 /**< @internal Size of int. */ -#define NC_FLOAT_LEN 4 /**< @internal Size of float. */ -#define NC_DOUBLE_LEN 8 /**< @internal Size of double. */ -#define NC_INT64_LEN 8 /**< @internal Size of int64. */ - -/** @internal Names of atomic types. */ -const char* nc4_atomic_name[NUM_ATOMIC_TYPES] = {"none", "byte", "char", - "short", "int", "float", - "double", "ubyte", - "ushort", "uint", - "int64", "uint64", "string"}; -static const size_t nc4_atomic_size[NUM_ATOMIC_TYPES] = {0, NC_BYTE_LEN, NC_CHAR_LEN, NC_SHORT_LEN, - NC_INT_LEN, NC_FLOAT_LEN, NC_DOUBLE_LEN, - NC_BYTE_LEN, NC_SHORT_LEN, NC_INT_LEN, NC_INT64_LEN, - NC_INT64_LEN, NC_STRING_LEN}; - /** * @internal Get the name and size of an atomic type. For strings, 1 is * returned. diff --git a/libdispatch/derror.c b/libdispatch/derror.c index f1fd8bf5c4..e5ae6d121f 100644 --- a/libdispatch/derror.c +++ b/libdispatch/derror.c @@ -100,6 +100,8 @@ const char *nc_strerror(int ncerr1) { case NC_NOERR: return "No error"; + case NC_ERROR: + return "Non-specific error"; case NC_EBADID: return "NetCDF: Not a valid ID"; case NC_ENFILE: @@ -283,7 +285,11 @@ const char *nc_strerror(int ncerr1) return "NetCDF: Some object not found"; case NC_EPLUGIN: return "NetCDF: Unclassified failure in accessing a dynamically loaded plugin"; - default: + case NC_ENOTZARR: + return "Malformed (NC)Zarr file"; + case NC_EZARRMETA: + return "Malformed (NC)Zarr file consolidated metadata"; + default: #ifdef USE_PNETCDF /* The behavior of ncmpi_strerror here is to return NULL, not a string. This causes problems in (at least) diff --git a/libdispatch/dfilter.c b/libdispatch/dfilter.c index 206515a850..e0e25609be 100644 --- a/libdispatch/dfilter.c +++ b/libdispatch/dfilter.c @@ -8,11 +8,8 @@ */ #include "config.h" -#include -#include -#include -#ifdef _MSC_VER -#include +#ifdef USE_HDF5 +#include "hdf5internal.h" #endif #include "netcdf.h" @@ -21,10 +18,6 @@ #include "nc4internal.h" #include "nclog.h" -#ifdef USE_HDF5 -#include "hdf5internal.h" -#endif - #ifdef NETCDF_ENABLE_NCZARR #include "zdispatch.h" #endif diff --git a/libdispatch/dhttp.c b/libdispatch/dhttp.c index 45f93828f6..7c96800a5c 100644 --- a/libdispatch/dhttp.c +++ b/libdispatch/dhttp.c @@ -161,7 +161,7 @@ nc_http_close(NC_HTTP_STATE* state) #ifdef NETCDF_ENABLE_S3 case HTTPS3: { if(state->s3.s3client) - NC_s3sdkclose(state->s3.s3client, state->s3.info, 0, NULL); + NC_s3sdkclose(state->s3.s3client, NULL); NC_s3clear(state->s3.info); nullfree(state->s3.info); state->s3.s3client = NULL; diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c index adb3f13779..a0cbe241c9 100644 --- a/libdispatch/dinfermodel.c +++ b/libdispatch/dinfermodel.c @@ -127,7 +127,7 @@ static struct FORMATMODES { {"udf1",NC_FORMATX_UDF1,0}, {"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4}, {"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4}, -{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */ +{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until netcdf-3 vs netcdf-4 is determined */ {NULL,0}, }; @@ -137,15 +137,13 @@ static const struct MACRODEF { char* defkey; char* defvalues[4]; } macrodefs[] = { -{"zarr","mode",{"nczarr","zarr",NULL}}, +{"zarr","mode",{"zarr",NULL}}, +{"nczarr","mode",{"nczarr",NULL}}, {"dap2","mode",{"dap2",NULL}}, {"dap4","mode",{"dap4",NULL}}, -{"s3","mode",{"s3","nczarr",NULL}}, +{"s3","mode",{"s3",NULL}}, +{"gs3","mode",{"gs3",NULL}}, /* Google S3 API */ {"bytes","mode",{"bytes",NULL}}, -{"xarray","mode",{"zarr", NULL}}, -{"noxarray","mode",{"nczarr", "noxarray", NULL}}, -{"zarr","mode",{"nczarr","zarr", NULL}}, -{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */ {NULL,NULL,{NULL}} }; @@ -162,10 +160,8 @@ static const struct MODEINFER { char* key; char* inference; } modeinferences[] = { -{"zarr","nczarr"}, {"xarray","zarr"}, {"noxarray","nczarr"}, -{"noxarray","zarr"}, {NULL,NULL} }; @@ -174,6 +170,7 @@ static const struct MODEINFER modenegations[] = { {"bytes","nczarr"}, /* bytes negates (nc)zarr */ {"bytes","zarr"}, {"noxarray","xarray"}, +{"nozmetadata","zmetadata"}, {NULL,NULL} }; @@ -413,6 +410,7 @@ envvlist2string(NClist* envv, const char* delim) NCbytes* buf = NULL; char* result = NULL; + NC_UNUSED(delim); if(envv == NULL || nclistlength(envv) == 0) return NULL; buf = ncbytesnew(); for(i=0;i= NC_FIRSTUSERTYPEID) { stat = nc_inq_user_type(ncid,typeid,name,size,basetypep,nfieldsp,classp); } else -#endif +#endif /*USE_NETCDF4*/ if(typeid > NC_NAT && typeid <= NC_MAX_ATOMIC_TYPE) { if(basetypep) *basetypep = NC_NAT; if(nfieldsp) *nfieldsp = 0; diff --git a/libdispatch/dinstance_intern.c b/libdispatch/dinstance_intern.c index f1dac8eda6..6a4477e23e 100644 --- a/libdispatch/dinstance_intern.c +++ b/libdispatch/dinstance_intern.c @@ -20,6 +20,7 @@ Currently two operations are defined: #include "nc4dispatch.h" #include "ncoffsets.h" #include "ncbytes.h" +#include "nclog.h" #undef REPORT #undef DEBUG @@ -76,8 +77,11 @@ NC_reclaim_data(NC* nc, nc_type xtype, void* memory, size_t count) NC_TYPE_INFO_T* utype = NULL; assert(nc != NULL); - assert((memory == NULL && count == 0) || (memory != NULL || count > 0)); + /* If memory is NULL, ignore count */ + assert(memory == NULL || (memory != NULL && count > 0)); + if(memory == NULL) goto done; + /* Process atomic types */ /* Optimize: Vector of fixed size atomic types (always the case for netcdf-3)*/ @@ -88,7 +92,7 @@ NC_reclaim_data(NC* nc, nc_type xtype, void* memory, size_t count) if(xtype == NC_STRING) { char** ss = (char**)memory; for(i=0;ilen > 0 && vlen->p != NULL) { char** slist = (char**)vlen->p; /* vlen instance is a vector of string pointers */ - for(i=0;ilen;i++) {if(slist[i] != NULL) free(slist[i]);} + for(i=0;i<(int)vlen->len;i++) {if(slist[i] != NULL) {free(slist[i]);slist[i] = NULL;}} } goto out; } @@ -167,12 +172,12 @@ reclaim_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position instance) if((stat = NC_type_alignment_internal(file,basetypeid,basetype,&alignment))) goto done;; vinstance.memory = (char*)vlen->p; /* use char* so we can do pointer arithmetic */ vinstance.memory = (void*)NC_read_align((uintptr_t)vinstance.memory,alignment); - for(i=0;ilen;i++) { + for(i=0;i<(int)vlen->len;i++) { if((stat=reclaim_datar(file,basetype,vinstance))) goto done; /* reclaim one basetype instance */ vinstance.memory += basetype->size; /* move to next base instance */ } out: - if(vlen->len > 0 && vlen->p != NULL) {free(vlen->p);} + if(vlen->len > 0 && vlen->p != NULL) {free(vlen->p); vlen->p = NULL;} goto done; } else if(utype->nc_type_class == NC_COMPOUND) { Position finstance; /* mark the fields's instance */ @@ -198,7 +203,7 @@ reclaim_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position instance) if(field->nc_typeid == NC_STRING) { char** strvec = (char**)finstance.memory; for(i=0;ip = (void*)dststrvec; - for(i=0;ilen;i++) { + for(i=0;i<(int)srcvlens->len;i++) { if((dststrvec[i] = strdup(srcstrvec[i]))==NULL) {stat = NC_ENOMEM; goto done;} } goto done; @@ -406,7 +411,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d dstvlens->p = vdst.memory; /* don't lose it */ vsrc.memory = (void*)NC_read_align((uintptr_t)vsrc.memory,alignment); vdst.memory = (void*)NC_read_align((uintptr_t)vdst.memory,alignment); - for(i=0;ilen;i++) { + for(i=0;i<(int)srcvlens->len;i++) { if((stat=copy_datar(file,basetype,vsrc,vdst))) goto done; vsrc.memory += basetype->size; vdst.memory += basetype->size; @@ -441,7 +446,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d if(field->nc_typeid == NC_STRING) { char** srcstrvec = (char**)src.memory; char** dststrvec = (char**)dst.memory; - for(i=0;isize; fdst.memory += basetype->size; @@ -465,7 +470,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d } else {stat = NC_EBADTYPE; goto done;} done: - return stat; + return NCTHROW(stat); } #endif @@ -531,7 +536,7 @@ NC_type_alignment_internal(NC_FILE_INFO_T* file, nc_type xtype, NC_TYPE_INFO_T* Why was this here? if(stat == NC_NOERR && align == 0) stat = NC_EINVAL; #endif - return stat; + return NCTHROW(stat); } #endif @@ -546,11 +551,15 @@ NC_reclaim_data_all(NC* nc, nc_type xtypeid, void* memory, size_t count) int stat = NC_NOERR; assert(nc != NULL); + /* If memory is NULL, ignore count */ + assert(memory == NULL || (memory != NULL && count > 0)); + if(memory == NULL) goto done; stat = NC_reclaim_data(nc,xtypeid,memory,count); if(stat == NC_NOERR && memory != NULL) - free(memory); - return stat; + {free(memory); memory = NULL;} +done: + return NCTHROW(stat); } /* Alternate entry point: includes recovering the top-level memory */ @@ -597,7 +606,7 @@ NC_copy_data_all(NC* nc, nc_type xtype, const void* memory, size_t count, void** #endif if(copyp) {*copyp = copy; copy = NULL;} done: - return stat; + return NCTHROW(stat); } /* Alternate entry point: includes recovering the top-level memory */ diff --git a/libdispatch/dmissing.c b/libdispatch/dmissing.c index 0c9be82745..7a1cc3249b 100644 --- a/libdispatch/dmissing.c +++ b/libdispatch/dmissing.c @@ -1,5 +1,5 @@ /* - * Copyright 2018, University Corporation for Atmospheric Research + * Copyright 2018, University Corporation for Atmospheric Research * See netcdf/COPYRIGHT file for copying and redistribution conditions. */ @@ -70,24 +70,24 @@ strdup(const char* s) size_t strlcpy(char *dst, const char* src, size_t dsize) { - const char *osrc = src; - size_t nleft = dsize; - - /* Copy as many bytes as will fit. */ - if (nleft != 0) { - while (--nleft != 0) { - if ((*dst++ = *src++) == '\0') - break; - } - } - /* Not enough room in dst, add NUL and traverse rest of src. */ - if (nleft == 0) { - if (dsize != 0) - *dst = '\0'; /* NUL-terminate dst */ - while (*src++) - ; - } - return(src - osrc - 1); /* count does not include NUL */ + const char *osrc = src; + size_t nleft = dsize; + + /* Copy as many bytes as will fit. */ + if (nleft != 0) { + while (--nleft != 0) { + if ((*dst++ = *src++) == '\0') + break; + } + } + /* Not enough room in dst, add NUL and traverse rest of src. */ + if (nleft == 0) { + if (dsize != 0) + *dst = '\0'; /* NUL-terminate dst */ + while (*src++) + ; + } + return(src - osrc - 1); /* count does not include NUL */ } #endif @@ -119,29 +119,33 @@ strlcpy(char *dst, const char* src, size_t dsize) size_t nc_strlcat(char* dst, const char* src, size_t dsize) { - const char *odst = dst; - const char *osrc = src; - size_t n = dsize; - size_t dlen; - - /* Find the end of dst and adjust bytes left but don't go past end. */ - while (n-- != 0 && *dst != '\0') - dst++; - dlen = dst - odst; - n = dsize - dlen; - - if (n-- == 0) - return(dlen + strlen(src)); - while (*src != '\0') { - if (n != 0) { - *dst++ = *src; - n--; - } - src++; - } - *dst = '\0'; - - return(dlen + (src - osrc)); /* count does not include NUL */ + const char *odst = dst; + const char *osrc = src; + size_t n = dsize; + size_t dlen,slen; + + /* Find the end of dst and adjust bytes left but don't go past end. */ + while (n-- != 0 && *dst != '\0') + dst++; + dlen = dst - odst; + n = dsize - dlen; + + slen = (src==NULL?0:strlen(src)); + if (n-- == 0) + return(dlen + slen); + if(src != NULL) { + while (*src != '\0') { + if (n != 0) { + *dst++ = *src; + n--; + } + src++; + } + *dst = '\0'; + } + if(src != NULL) + return(dlen + (src - osrc)); /* count does not include NUL */ + return dlen; } #endif /*!HAVE_STRLCAT*/ @@ -151,11 +155,11 @@ nc_strlcat(char* dst, const char* src, size_t dsize) Not currently used /* Define an version of strcasestr renamed to avoid any system definition */ /* See https://android.googlesource.com/platform/bionic/+/a27d2baa/libc/string/strcasestr.c */ -/* $OpenBSD: strcasestr.c,v 1.3 2006/03/31 05:34:55 deraadt Exp $ */ -/* $NetBSD: strcasestr.c,v 1.2 2005/02/09 21:35:47 kleink Exp $ */ +/* $OpenBSD: strcasestr.c,v 1.3 2006/03/31 05:34:55 deraadt Exp $ */ +/* $NetBSD: strcasestr.c,v 1.2 2005/02/09 21:35:47 kleink Exp $ */ /*- * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. + * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Chris Torek. diff --git a/libdispatch/dplugins.c b/libdispatch/dplugins.c index 4347d8227b..f01360e67e 100644 --- a/libdispatch/dplugins.c +++ b/libdispatch/dplugins.c @@ -110,6 +110,7 @@ nc_plugin_path_initialize(void) assert(dst != NULL); for(i=0;irchome); rcfreeentries(info->entries); NC_s3freeprofilelist(info->s3profiles); - } static void diff --git a/libdispatch/ds3util.c b/libdispatch/ds3util.c index 2b81f342bc..9401ace692 100644 --- a/libdispatch/ds3util.c +++ b/libdispatch/ds3util.c @@ -25,6 +25,7 @@ #include "nc4internal.h" #include "ncuri.h" #include "nclist.h" +#include "ncbytes.h" #include "ncrc.h" #include "nclog.h" #include "ncs3sdk.h" @@ -34,9 +35,6 @@ /* Alternate .aws directory location */ #define NC_TEST_AWS_DIR "NC_TEST_AWS_DIR" -#define AWSHOST ".amazonaws.com" -#define GOOGLEHOST "storage.googleapis.com" - enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4}; /* Read these files in order and later overriding earlier */ @@ -47,6 +45,7 @@ static const char* awsconfigfiles[] = {".aws/config",".aws/credentials",NULL}; /* Forward */ static int endswith(const char* s, const char* suffix); +static void freeprofile(struct AWSprofile* profile); static void freeentry(struct AWSentry* e); static int awsparse(const char* text, NClist* profiles); @@ -78,9 +77,9 @@ NC_s3sdkenvironment(void) /* Rebuild an S3 url into a canonical path-style url. If region is not in the host, then use specified region -if provided, otherwise leave blank and let the S3 server deal with it. -@param url (in) the current url -@param s3 (in/out) NCS3INFO structure +if provided, otherwise us-east-1. +@param url (in) the current url +@param s3 (in/out) NCS3INFO structure @param pathurlp (out) the resulting pathified url string */ @@ -309,7 +308,13 @@ NC_s3urlprocess(NCURI* url, NCS3INFO* s3, NCURI** newurlp) /* Rebuild the URL to path format and get a usable region and optional bucket*/ if((stat = NC_s3urlrebuild(url,s3,&url2))) goto done; - s3->host = strdup(url2->host); + if(url2->port){ + char hostport[8192]; + snprintf(hostport,sizeof(hostport),"%s:%s",url2->host,url2->port); + s3->host = strdup(hostport); + }else{ + s3->host = strdup(url2->host); + } /* construct the rootkey minus the leading bucket */ pathsegments = nclistnew(); if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done; @@ -358,19 +363,29 @@ NC_s3clear(NCS3INFO* s3) } /* -Check if a url has indicators that signal an S3 or Google S3 url. +Check if a url has indicators that signal an S3 or Google S3 url or ZoH S3 url. +The rules are as follows: +1. If the protocol is "s3" or "gs3" or "zoh", then return (true,s3|gs3|zoh). +2. If the mode contains "s3" or "gs3" or "zoh", then return (true,s3|gs3|zoh). +3. Check the host name: +3.1 If the host ends with ".amazonaws.com", then return (true,s3). +3.1 If the host is "storage.googleapis.com", then return (true,gs3). +4. Otherwise return (false,unknown). */ int -NC_iss3(NCURI* uri, enum NCS3SVC* svcp) +NC_iss3(NCURI* uri, NCS3SVC* svcp) { int iss3 = 0; NCS3SVC svc = NCS3UNK; if(uri == NULL) goto done; /* not a uri */ - /* is the protocol "s3" or "gs3" ? */ + /* is the protocol "s3" or "gs3" or "zoh" ? */ if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; svc = NCS3; goto done;} if(strcasecmp(uri->protocol,"gs3")==0) {iss3 = 1; svc = NCS3GS; goto done;} +#ifdef NETCDF_ENABLE_ZOH + if(strcasecmp(uri->protocol,"zoh")==0) {iss3 = 1; svc = NCS3ZOH; goto done;} +#endif /* Is "s3" or "gs3" in the mode list? */ if(NC_testmode(uri,"s3")) {iss3 = 1; svc = NCS3; goto done;} if(NC_testmode(uri,"gs3")) {iss3 = 1; svc = NCS3GS; goto done;} @@ -384,18 +399,71 @@ NC_iss3(NCURI* uri, enum NCS3SVC* svcp) return iss3; } -const char* -NC_s3dumps3info(NCS3INFO* info) +/**************************************************/ +/** +The .aws/config and .aws/credentials files +are in INI format (https://en.wikipedia.org/wiki/INI_file). +This format is not well defined, so the grammar used +here is restrictive. Here, the term "profile" is the same +as the INI term "section". + +The grammar used is as follows: + +Grammar: + +inifile: profilelist ; +profilelist: profile | profilelist profile ; +profile: '[' profilename ']' EOL entries ; +entries: empty | entries entry ; +entry: WORD = WORD EOL ; +profilename: WORD ; +Lexical: +WORD sequence of printable characters - [ \[\]=]+ +EOL '\n' | ';' + +Note: +1. The semicolon at beginning of a line signals a comment. +2. # comments are not allowed +3. Duplicate profiles or keys are ignored. +4. Escape characters are not supported. +*/ + +#define AWS_EOF (-1) +#define AWS_ERR (0) +#define AWS_WORD (0x10001) +#define AWS_EOL (0x10002) + +typedef struct AWSparser { + char* text; + char* pos; + size_t yylen; /* |yytext| */ + NCbytes* yytext; + int token; /* last token found */ + int pushback; /* allow 1-token pushback */ +} AWSparser; + +#ifdef LEXDEBUG +static const char* +tokenname(int token) { - static char text[8192]; - snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s", - (info->host?info->host:"null"), - (info->region?info->region:"null"), - (info->bucket?info->bucket:"null"), - (info->rootkey?info->rootkey:"null"), - (info->profile?info->profile:"null")); - return text; + static char num[32]; + switch(token) { + case AWS_EOF: return "EOF"; + case AWS_ERR: return "ERR"; + case AWS_WORD: return "WORD"; + default: snprintf(num,sizeof(num),"%d",token); return num; + } + return "UNKNOWN"; } +#endif + +/* +@param text of the aws credentials file +@param profiles list of form struct AWSprofile (see ncauth.h) +*/ + +#define LBR '[' +#define RBR ']' static void freeprofile(struct AWSprofile* profile) @@ -426,6 +494,19 @@ NC_s3freeprofilelist(NClist* profiles) } } +const char* +NC_s3dumps3info(NCS3INFO* info) +{ + static char text[8192]; + snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s", + (info->host?info->host:"null"), + (info->region?info->region:"null"), + (info->bucket?info->bucket:"null"), + (info->rootkey?info->rootkey:"null"), + (info->profile?info->profile:"null")); + return text; +} + /* Find, load, and parse the aws config &/or credentials file */ int NC_aws_load_credentials(NCglobalstate* gstate) @@ -707,15 +788,6 @@ tokenname(int token) } #endif -typedef struct AWSparser { - char* text; - char* pos; - size_t yylen; /* |yytext| */ - NCbytes* yytext; - int token; /* last token found */ - int pushback; /* allow 1-token pushback */ -} AWSparser; - static int awslex(AWSparser* parser) { diff --git a/libdispatch/dtype.c b/libdispatch/dtype.c index 3de208c1e0..6871b796f8 100644 --- a/libdispatch/dtype.c +++ b/libdispatch/dtype.c @@ -38,7 +38,6 @@ type). Read attributes of the new type with nc_get_att (see /** \{ */ - /** \ingroup user_types Learn if two types are equal. diff --git a/libdispatch/dutil.c b/libdispatch/dutil.c index f6f0eecc34..20c1380bec 100644 --- a/libdispatch/dutil.c +++ b/libdispatch/dutil.c @@ -539,3 +539,47 @@ NC_joinwith(NClist* segments, const char* sep, const char* prefix, const char* s ncbytesfree(buf); return stat; } + +static int +lexical_compare(const void* arg1, const void* arg2) +{ + char* s1 = *((char**)arg1); + char* s2 = *((char**)arg2); + size_t slen1 = nulllen(s1); + size_t slen2 = nulllen(s2); + if(slen1 != slen2) return (slen1 - slen2); + return strcmp(s1,s2); +} + +/** +Sort a vector of strings. +@param n Number of strings to sort +@param env vector of strings to sort +*/ +void +NC_sortenvv(size_t n, char** envv) +{ + if(n <= 1) return; + qsort(envv, (int)n, sizeof(char*), lexical_compare); +} + +/** +Sort a nclist of strings. +@param l NClist of strings +*/ +void +NC_sortlist(NClist* l) +{ + if(l == NULL || nclistlength(l) == 0) return; + NC_sortenvv(nclistlength(l),(char**)nclistcontents(l)); +} + +/* Free up a vector of strings */ +void +NC_freeenvv(size_t nkeys, char** keys) +{ + size_t i; + for(i=0;ifalse; !0=>true)*/ - NCjson* list; -} NCjson; - -#define NCJ_LBRACKET '[' -#define NCJ_RBRACKET ']' -#define NCJ_LBRACE '{' -#define NCJ_RBRACE '}' -#define NCJ_COLON ':' -#define NCJ_COMMA ',' -#define NCJ_QUOTE '"' -#define NCJ_TRUE "true" -#define NCJ_FALSE "false" - -#define NCJ_WORD "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-$" - -/*//////////////////////////////////////////////////*/ - -typedef struct NCJparser { - char* text; - char* pos; - char* yytext; - int errno; - struct { - char* yytext; - int token; - } pushback; -} NCJparser; - -static int -NCjsonparse(char* text, NCjson** treep) -{ - int status = NCJ_OK; - size_t len; - NCJparser parser = NULL; - NCjson* tree = NULL; - if(text == NULL) {status = NCJ_EINVAL; goto done;} - parser = calloc(1,sizeof(NCJparser)); - if(parser == NULL) {status = NCJ_ENOMEM; goto done;} - len = strlen(text); - parser->text = (char*)malloc(len+1+1); - if(parser->text == NULL) {status = NCJ_ENOMEM; goto done;} - strcpy(parser->text,text); - parser->text[len] = '\0'; - parser->text[len+1] = '\0'; - tree = NCJparseR(parser); -done: - if(parser != NULL) { - nullfree(parser->text); - nullfree(parser->yytext); - free(parser); - } - if(status != NCJ_OK) { - if(tree != NULL) NCjsonfree(tree); - } else - if(treep) *treep = tree; - return status; -} - -static int -NCJyytext(NCJparser* parser, char* start, ptrdiff_t pdlen) -{ - size_t len = (size_t)pdlen; - if(parser->yytext == NULL) - parser->yytext = (char*)malloc(len+1); - else - parser->yytext = (char*) realloc(parser->yytext,len+1); - if(parser->yytext == NULL) return NCJ_ENOMEM; - memcpy(parser->yytext,start,len); - parser->yytext[len] = NCJ_NUL; - return NCJ_OK; -} - -static void -NCJpushback(NCJparser* parser, int token) -{ - parser->pushback.token = token; - parser->pushback.yytext = strdup(parser->yytext); -} - -static int -NCJlex(NCJparser* parser) -{ - int c; - int token = NCJ_NUL; - char* start; - char* next; - - if(parser->pushback.token != NCJ_NOTOKEN) { - token = parser->pushback.token; - NCJyytext(parser,parser->pushback.yytext,strlen(parser->pushback.yytext)); - nullfree(parser->pushback.yytext); - parser->pushback.yytext = NULL; - parser->pushback.token = NCJ_NOTOKEN; - return token; - } - - c = *parser->pos; - if(c == NCJ_NUL) { - token = NCJ_NUL; - } else if(strchr(NCJ_WORD, c) != NULL) { - size_t len; - start = parser->pos; - next = start + 1; - for(;;) { - c = *parser->pos++; - if(strchr(NCJ_WHITESPACE,c) != NULL || c == NCJ_NUL) break; - last++; - } - if(!NCJyytext(parser,start,(next - start))) goto done; - token = NCJ_WORD; - } else if(c == NCJ_QUOTE) { - parser->pos++; - start = parser->pos; - next = start+1; - for(;;) { - c = *parser->pos++; - if(c == NCJ_QUOTE || c == NCJ_NUL) break; - last++; - } - if(c == NCJ_NUL) { - parser->errno = NCJ_ESTRING; - token = NCJ_ERR; - goto done; - } - if(!NCJyytext(parser,start,(next - start))) goto done; - token = NCJ_STRING; - } else { /* single char token */ - token = *parser->pos++; - } -done: - if(parser->errno) token = NCJ_ERR; - return token; -} - -/* Simple recursive descent */ - -static int -NCJparseR(NCJparser* parser, NCjson** listp) -{ - int token = NCJ_ERR; - NCjson* list = NULL; - if((token = NCJlex(parser)) == NCJ_ERR) goto done; - switch (token) { - case NCJ_NUL; - break; - case NCJ_WORD: - NCJappend(NCJparseAtomic(parser,token),listp); - break; - case NCJ_LBRACE: - NCJappend(NCJparseMap(parser,locallist),listp); - break; - case NCJ_LBRACKET: - NCJappend(NCJparseArray(parser,NULL),) - case NCJ_STRING: - return NCJparseAtomic(parser,token); - default: - parser->errno = NCJ_EBADTOKEN; - } - return NULL; -} - -static NCjson* -NCJparseAtomic(NCJparser* parser, int kind) -{ - /* assert (kind == NCJ_WORD || kind = NCJ_QUOTE) */ - NCjson* node; - if((node = NCJmakenode(parser)) == NULL) - {parser->errno = NCJ_ENOMEM; goto done;} - if(kind == NCJ_STRING) - node->sort = NCJ_WORD; - node->word = strdup(parser->yytext); - } else { - /* Try to convert to number or boolean; last resort is word */ - size_t count = (last - start) + 1; - int nread = 0; - int ncvt = sscan(parser->yytext, - "%L",&node->num,&nread); - if(ncvt == 1 && nread == count) { - node->sort = NCJ_NUMBER; - } else if(strcasecmp(parser->yytext,NCJ_TRUE)==0) { - node->sort = NCJ_BOOLEAN; - node->num = 1; - } else if(strcasecmp(parser->yytext,NCJ_FALSE)==0) { - node->sort = NCJ_BOOLEAN; - node->num = 0; - } else { - node->word = strdup(parser->yytext); - node->sort = NCJ_WORD; - } - } -done: - return node; -} - -static NCjson* -NCJparseArray(NCJparser* parser) -{ - NCjson* head = NULL; - NCjson* last = NULL; - int token = NCJ_ERR; -#if 0 - if((node = NCJmakenode(parser)) == NULL) goto done; -#endif - loop: - for(;;) { - if((token = NCJlex(parser)) == NCJ_ERR) goto done; - switch (token) { - case NCJ_NUL; - break; - case NCJ_RBRACKET: - break loop; - default: - NCJpushback(parser,token); - NCjson* o = NCJparseR(parser); - tokens.nextToken(); - if(tokens.ttype == NCJ_EOF) break; - else if(tokens.ttype == RBRACKET) tokens.pushBack(); - else if(tokens.ttype != COMMA) - throw new IOException("Missing comma in list"); - array.add(o); - } - } - return array; -} - -static NCjson parseMap(StreamTokenizer tokens) -{ - assert (tokens.ttype == LBRACE); - Map map = new LinkedHashMap<>(); /* Keep insertion order */ - loop: - for(; ; ) { - int token = tokens.nextToken(); - switch (token) { - case NCJ_NCJ_EOL: - break; /* ignore */ - case NCJ_NCJ_EOF: - throw new IOException("Unexpected eof"); - case NCJ_RBRACE: - break loop; - default: - tokens.pushBack(); - NCjson name = parseR(tokens); - if(tokens.ttype == NCJ_EOF) break; - if(name instanceof char* - || name instanceof Long - || name instanceof Boolean) { - /*ok*/ - } else - throw new IOException("Unexpected map name type: " + name); - if(tokens.nextToken() != COLON) - throw new IOException("Expected ':'; found: " + tokens.ttype); - NCjson o = parseR(tokens); - tokens.nextToken(); - if(tokens.ttype == NCJ_EOF) break; - else if(tokens.ttype == RBRACE) tokens.pushBack(); - else if(tokens.ttype != COMMA) - throw new IOException("Missing comma in list"); - map.put(name.tochar*(), o); - } - } - return map; -} -} - -static char* tochar*(Object o) {return tochar*(o,"");} - -static char* tochar*(Object o, char* demark) -{ -char*Builder buf = new char*Builder(); -tochar*R(o, buf, demark, 0); -return buf.tochar*(); -} - -static static void tochar*R(Object o, char*Builder buf, char* demark, int indent) -{ -boolean first = true; -if(o instanceof List) { - List list = (List) o; - if(list.size()== 0) { - buf.append(LBRACKET); - buf.append(RBRACKET); - } else { - buf.append(LBRACKET); - buf.append('\n'); - for(int i=0;i map = (Map) o; - if(map.size() == 0) { - buf.append(LBRACE); - buf.append(RBRACE); - } else { - buf.append(LBRACE); - buf.append('\n'); - int i = 0; - for(Map.Entry e : map.entrySet()) { - buf.append(indent(indent + 2)); - buf.append(QUOTE); - buf.append(e.getKey().replace("\"","\\\"")); - buf.append(QUOTE); - buf.append(' '); - buf.append(COLON); - buf.append(' '); - tochar*R(e.getValue(), buf, demark, indent + 2); - if(i < map.size() - 1) buf.append(","); - buf.append("\n"); - i++; - } - buf.append(indent(indent)); - buf.append(RBRACE); - } -} else if((o instanceof Long) || (o instanceof Boolean)) { - buf.append(demark); - buf.append(o.tochar*()); - buf.append(demark); -} else { - buf.append(QUOTE); - buf.append(o.tochar*().replace("\"","\\\"")); - buf.append(QUOTE); -} -} - -static char* blanks = " "; - -static static char* indent(int n) -{ -while(n > blanks.length()) { - blanks = blanks + blanks; -} -return blanks.substring(0, n); -} - -} - -static NCjson* -NCJmakenode(NCjsonparser* parser) -{ - NCjson* node = NULL; - parser->errno = NCJ_OK; - node = (NCjson*)calloc(1,sizeof(NCjson)); - if(node == null) parser->errno = NCJ_ENOMEM; - return node; -} - - -#endif /*NCJSON_INC*/ diff --git a/libdispatch/ncbytes.c b/libdispatch/ncbytes.c index 02ac617075..a7f36a1346 100644 --- a/libdispatch/ncbytes.c +++ b/libdispatch/ncbytes.c @@ -6,6 +6,7 @@ #include #include "ncbytes.h" +#include "nclog.h" #ifndef TRUE #define TRUE 1 @@ -19,6 +20,10 @@ #define NCBYTESDEBUG 1 +#ifdef NCBYTESDEBUG +#include +#endif + static int ncbytesfail(void) { @@ -26,7 +31,7 @@ ncbytesfail(void) fprintf(stderr,"NCbytes failure\n"); fflush(stderr); #ifdef NCBYTESDEBUG - abort(); + assert(!("NCbytes failure")); #endif return FALSE; } @@ -48,17 +53,22 @@ ncbytessetalloc(NCbytes* bb, unsigned long sz) { char* newcontent; if(bb == NULL) return ncbytesfail(); - if(sz == 0) {sz = (bb->alloc?2*bb->alloc:DEFAULTALLOC);} - if(bb->alloc >= sz) return TRUE; if(bb->nonextendible) return ncbytesfail(); +// if(sz == 0) {sz = (bb->alloc?2*bb->alloc:DEFAULTALLOC);} /* Default the size */ + if(sz == 0) return TRUE; + /* We always guarantee that bb->content is not NULL */ + if(bb->alloc >= sz) { + assert(bb->content != NULL); + return TRUE; + } newcontent=(char*)calloc(sz,sizeof(char)); if(newcontent == NULL) ncbytesfail(); - if(bb->alloc > 0 && bb->length > 0 && bb->content != NULL) { + if(bb->length > 0 && bb->content != NULL) { memcpy((void*)newcontent,(void*)bb->content,sizeof(char)*bb->length); } - if(bb->content != NULL) free(bb->content); - bb->content=newcontent; - bb->alloc=sz; + if(bb->content != NULL) {free(bb->content); bb->content = NULL;} + bb->content = newcontent; newcontent = NULL; + bb->alloc = sz; return TRUE; } @@ -66,7 +76,9 @@ EXTERNL void ncbytesfree(NCbytes* bb) { if(bb == NULL) return; - if(!bb->nonextendible && bb->content != NULL) free(bb->content); + if(!bb->nonextendible && bb->content != NULL) { + free(bb->content); bb->content = NULL; + } free(bb); } @@ -102,7 +114,7 @@ int ncbytesset(NCbytes* bb, unsigned long index, char elem) { if(bb == NULL) return ncbytesfail(); - if(index >= bb->length) return ncbytesfail(); + if(index >= bb->alloc) return ncbytesfail(); bb->content[index] = elem; return TRUE; } @@ -118,16 +130,14 @@ ncbytesappend(NCbytes* bb, char elem) return TRUE; } -/* This assumes s is a null terminated string*/ +/* This assumes s is a null terminated string; but nul not part of the length*/ int ncbytescat(NCbytes* bb, const char* s) { if(bb == NULL) return ncbytesfail(); if(s == NULL) return 1; - ncbytesappendn(bb,(void*)s,strlen(s)+1); /* include trailing null*/ - /* back up over the trailing null*/ - if(bb->length == 0) return ncbytesfail(); - bb->length--; + ncbytesappendn(bb,(void*)s,strlen(s)); + ncbytesnull(bb); /* include trailing null*/ return 1; } @@ -135,8 +145,8 @@ int ncbytesappendn(NCbytes* bb, const void* elem, unsigned long n) { if(bb == NULL || elem == NULL) return ncbytesfail(); - if(n == 0) {n = strlen((char*)elem);} ncbytessetalloc(bb,bb->length+n); + if(n == 0) return TRUE; memcpy((void*)&bb->content[bb->length],(void*)elem,n); bb->length += n; return TRUE; @@ -179,7 +189,7 @@ ncbytessetcontents(NCbytes* bb, void* contents, unsigned long alloc) { if(bb == NULL) return ncbytesfail(); ncbytesclear(bb); - if(!bb->nonextendible && bb->content != NULL) free(bb->content); + if(!bb->nonextendible && bb->content != NULL) {free(bb->content); bb->content = NULL;} bb->content = (char*)contents; bb->length = alloc; bb->alloc = alloc; @@ -191,8 +201,8 @@ ncbytessetcontents(NCbytes* bb, void* contents, unsigned long alloc) int ncbytesnull(NCbytes* bb) { - ncbytesappend(bb,'\0'); - bb->length--; + ncbytessetalloc(bb,bb->length+1); /* Force allocation of space and make room for nul term */ + bb->content[bb->length] = '\0'; /* Leave length unchanged */ return 1; } @@ -209,3 +219,18 @@ ncbytesremove(NCbytes* bb, unsigned long pos) bb->length--; return TRUE; } + +/* Insert n bytes into the buffer at position pos*/ +int +ncbytesinsert(NCbytes* bb, size_t pos, size_t n, const char* s) +{ + if(bb == NULL) return ncbytesfail(); + if(pos > bb->length) ncbytesfail(); + if((bb->length + n) >= bb->alloc) if(!ncbytessetalloc(bb,bb->length+n+1)) return ncbytesfail(); + if(bb->length > 0) + memmove(bb->content+pos+n,bb->content+pos,(bb->length - pos)); + memcpy(bb->content+pos,s,n); + bb->length += n; + bb->content[bb->length] = '\0'; + return TRUE; +} diff --git a/libdispatch/nch5s3comms.c b/libdispatch/nch5s3comms.c index 29aedea326..0fc97deb6f 100644 --- a/libdispatch/nch5s3comms.c +++ b/libdispatch/nch5s3comms.c @@ -92,7 +92,7 @@ #include "netcdf.h" #include "ncuri.h" -#include "ncutil.h" +#include "netcdf_vutils.h" /*****************/ @@ -292,7 +292,6 @@ static int build_range(size_t offset, size_t len, char** rangep); static const char* verbtext(HTTPVerb verb); static int trace(CURL* curl, int onoff); static int sortheaders(VList* headers); -static int httptonc(long httpcode); static void hrb_node_free(hrb_node_t *node); #if S3COMMS_DEBUG_HRB @@ -789,7 +788,7 @@ NCH5_s3comms_s3r_close(s3r_t *handle) *---------------------------------------------------------------------------- */ int -NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long* sizep) +NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long* sizep, long* httpcodep) { int ret_value = SUCCEED; char* contentlength = NULL; @@ -806,7 +805,8 @@ NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long* sizep) if((ret_value = NCH5_s3comms_s3r_head(handle, url, "Content-Length", NULL, &httpcode, &contentlength))) HGOTO_ERROR(H5E_ARGS, ret_value, FAIL, "NCH5_s3comms_s3r_head failed."); - if((ret_value = httptonc(httpcode))) goto done; + /* Content-Length will not be defined if object does not exist */ + if(httpcode == 404) goto done; /****************** * PARSE RESPONSE * @@ -816,7 +816,7 @@ NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long* sizep) if(value == NULL) HGOTO_ERROR(H5E_ARGS, NC_EINVAL, FAIL, "could not find content length value"); value++; - content_length = strtoumax(value, NULL, 0); + content_length = (long long)strtoumax(value, NULL, 0); if (UINTMAX_MAX > SIZE_MAX && content_length > SIZE_MAX) HGOTO_ERROR(H5E_ARGS, NC_ERANGE, FAIL, "content_length overflows size_t"); @@ -825,9 +825,9 @@ NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long* sizep) "could not convert found \"Content-Length\" response (\"%s\")", contentlength); /* range is null-terminated, remember */ - if(sizep) {*sizep = (long long)content_length;} - done: + if(sizep) {*sizep = (long long)content_length;} + if(httpcodep) *httpcodep = httpcode; nullfree(contentlength); return UNTRACEX(ret_value,"size=%lld",(sizep?-1:*sizep)); } /* NCH5_s3comms_s3r_getsize */ @@ -859,17 +859,13 @@ NCH5_s3comms_s3r_deletekey(s3r_t *handle, const char* url, long* httpcodep) if((ret_value = NCH5_s3comms_s3r_execute(handle, url, HTTPDELETE, NULL, NULL, NULL, &httpcode, data))) HGOTO_ERROR(H5E_ARGS, ret_value, FAIL, "execute failed."); - - /****************** - * RESPONSE * - ******************/ - if((ret_value = httptonc(httpcode))) goto done; - if(httpcode != 204) - HGOTO_ERROR(H5E_ARGS, NC_ECANTREMOVE, FAIL, "deletekey failed."); + + /* Apparently, aws delivers a 204 response if it successfully deletes the key */ + if(httpcode == 204) httpcode = 200; /* treat 204 as success */ done: - vsfree(data); if(httpcodep) *httpcodep = httpcode; + vsfree(data); return UNTRACEX(ret_value,"httpcode=%d",INULL(httpcodep)); } /* NCH5_s3comms_s3r_getsize */ @@ -911,12 +907,8 @@ NCH5_s3comms_s3r_head(s3r_t *handle, const char* url, const char* header, const if (SUCCEED != NCH5_s3comms_s3r_execute(handle, url, HTTPHEAD, NULL, header, NULL, &httpcode, data)) HGOTO_ERROR(H5E_ARGS, NC_EINVAL, FAIL, "problem in reading during getsize."); - if((ret_value = httptonc(httpcode))) goto done; - if(header != NULL) { - if(vslength(data) == 0) - HGOTO_ERRORVA(H5E_ARGS, NC_EINVAL, FAIL, "HTTP metadata: header=%s; not found",header); - else if (vslength(data) > CURL_MAX_HTTP_HEADER) + if (vslength(data) > CURL_MAX_HTTP_HEADER) HGOTO_ERROR(H5E_ARGS, NC_EINVAL, FAIL, "HTTP metadata buffer overrun"); #if S3COMMS_DEBUG else @@ -930,7 +922,7 @@ NCH5_s3comms_s3r_head(s3r_t *handle, const char* url, const char* header, const if(header != NULL) { char* content; - content = vsextract(data); + content = vsextract(data); /* guaranteed to exist */ if(valuep) {*valuep = content;} } @@ -1222,7 +1214,7 @@ NCH5_s3comms_s3r_open(const char* root, NCS3SVC svc, const char *region, const c *---------------------------------------------------------------------------- */ int -NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, s3r_buf_t* dest) +NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, s3r_buf_t* dest, long* httpcodep) { char *rangebytesstr = NULL; int ret_value = SUCCEED; @@ -1251,9 +1243,9 @@ NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, if((ret_value = NCH5_s3comms_s3r_execute(handle, url, HTTPGET, rangebytesstr, NULL, NULL, &httpcode, wrap))) HGOTO_ERROR(H5E_ARGS, ret_value, FAIL, "execute failed."); - if((ret_value = httptonc(httpcode))) goto done; done: + if(httpcodep) *httpcodep = httpcode; (void)vsextract(wrap); vsfree(wrap); /* clean any malloc'd resources */ @@ -1271,7 +1263,7 @@ NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, *---------------------------------------------------------------------------- */ int -NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data) +NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data, long* httpcodep) { int ret_value = SUCCEED; VList* otherheaders = vlistnew(); @@ -1301,9 +1293,10 @@ NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data) vssetlength(wrap,data->count); if((ret_value = NCH5_s3comms_s3r_execute(handle, url, HTTPPUT, NULL, NULL, (const char**)vlistcontents(otherheaders), &httpcode, wrap))) HGOTO_ERROR(H5E_ARGS, ret_value, FAIL, "execute failed."); - if((ret_value = httptonc(httpcode))) goto done; + done: + if(httpcodep) *httpcodep = httpcode; (void)vsextract(wrap); vsfree(wrap); /* clean any malloc'd resources */ @@ -1321,7 +1314,7 @@ NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data) *---------------------------------------------------------------------------- */ int -NCH5_s3comms_s3r_getkeys(s3r_t *handle, const char* url, s3r_buf_t* response) +NCH5_s3comms_s3r_getkeys(s3r_t *handle, const char* url, s3r_buf_t* response, long* httpcodep) { int ret_value = SUCCEED; const char* otherheaders[3] = {"Content-Type", "application/xml", NULL}; @@ -1340,13 +1333,13 @@ NCH5_s3comms_s3r_getkeys(s3r_t *handle, const char* url, s3r_buf_t* response) if((SUCCEED != NCH5_s3comms_s3r_execute(handle, url, HTTPGET, NULL, NULL, otherheaders, &httpcode, content))) HGOTO_ERROR(H5E_ARGS, ret_value, FAIL, "execute failed."); - if((ret_value = httptonc(httpcode))) goto done; if(response) { response->count = vslength(content); response->content = vsextract(content); } done: + if(httpcodep) *httpcodep = httpcode; vsfree(content); /* clean any malloc'd resources */ curl_reset(handle); @@ -1421,7 +1414,7 @@ NCH5_s3comms_aws_canonical_request(VString* canonical_request_dest, VString* sig { hrb_node_t *node = NULL; int ret_value = SUCCEED; - int i; + size_t i; const char* sverb = verbtext(verb); const char* query_params = (query?query:""); @@ -1568,7 +1561,7 @@ NCH5_s3comms_HMAC_SHA256(const unsigned char *key, size_t key_len, const char *m #else if(CURLE_OK != Curl_hmacit(Curl_HMAC_SHA256, key, key_len, - msg, msg_len, + (const unsigned char*)msg, msg_len, md)) HGOTO_ERROR(H5E_ARGS, NC_EINTERNAL, FAIL, "Curl_hmacit failure."); #endif @@ -2052,7 +2045,7 @@ NCH5_s3comms_signing_key(unsigned char **mdp, const char *secret, const char *re HMAC(EVP_sha256(), (const unsigned char *)dateregionservicekey, SHA256_DIGEST_LENGTH, (const unsigned char *)"aws4_request", 12, md, NULL); #else - Curl_hmacit(Curl_HMAC_SHA256, (const unsigned char *)AWS4_secret, (int)nulllen(AWS4_secret), + Curl_hmacit(Curl_HMAC_SHA256, (const unsigned char *)AWS4_secret, nulllen(AWS4_secret), (const unsigned char *)iso8601now, 8, /* 8 --> length of 8 --> "yyyyMMDD" */ datekey); Curl_hmacit(Curl_HMAC_SHA256, (const unsigned char *)datekey, SHA256_DIGEST_LENGTH, (const unsigned char *)region, @@ -2390,7 +2383,8 @@ build_request(s3r_t* handle, NCURI* purl, VString* payload, HTTPVerb verb) { - int i,ret_value = SUCCEED; + int ret_value = SUCCEED; + size_t i; struct curl_slist *curlheaders = NULL; hrb_node_t *node = NULL; hrb_t *request = NULL; @@ -2788,32 +2782,6 @@ sortheaders(VList* headers) return (ret_value); } -static int -httptonc(long httpcode) -{ - int stat = NC_NOERR; - if(httpcode <= 99) stat = NC_EINTERNAL; /* should never happen */ - else if(httpcode <= 199) - stat = NC_NOERR; /* I guess */ - else if(httpcode <= 299) { - switch (httpcode) { - default: stat = NC_NOERR; break; - } - } else if(httpcode <= 399) - stat = NC_NOERR; /* ? */ - else if(httpcode <= 499) { - switch (httpcode) { - case 400: stat = NC_EINVAL; break; - case 401: case 402: case 403: - stat = NC_EAUTH; break; - case 404: stat = NC_EEMPTY; break; - default: stat = NC_EINVAL; break; - } - } else - stat = NC_ES3; - return stat; -} - /**************************************************/ /* Request Tracing */ diff --git a/libdispatch/nch5s3comms.h b/libdispatch/nch5s3comms.h index 7cc482df66..f7c383e037 100644 --- a/libdispatch/nch5s3comms.h +++ b/libdispatch/nch5s3comms.h @@ -506,13 +506,13 @@ EXTERNL s3r_t *NCH5_s3comms_s3r_open(const char* root, NCS3SVC svc, const char* EXTERNL int NCH5_s3comms_s3r_close(s3r_t *handle); -EXTERNL int NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, s3r_buf_t* data); +EXTERNL int NCH5_s3comms_s3r_read(s3r_t *handle, const char* url, size_t offset, size_t len, s3r_buf_t* data, long* httpcodep); -EXTERNL int NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data); +EXTERNL int NCH5_s3comms_s3r_write(s3r_t *handle, const char* url, const s3r_buf_t* data, long* httpcodep); -EXTERNL int NCH5_s3comms_s3r_getkeys(s3r_t *handle, const char* url, s3r_buf_t* response); +EXTERNL int NCH5_s3comms_s3r_getkeys(s3r_t *handle, const char* url, s3r_buf_t* response, long* httpcodep); -EXTERNL int NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long * sizep); +EXTERNL int NCH5_s3comms_s3r_getsize(s3r_t *handle, const char* url, long long * sizep, long* httpcodep); EXTERNL int NCH5_s3comms_s3r_deletekey(s3r_t *handle, const char* url, long* httpcodep); diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index 6a9b46cc6d..30e272b9b2 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -6,9 +6,18 @@ TODO: make utf8 safe */ +/* +WARNING: +If you modify this file, +then you need to got to +the include/ directory +and do the command: + make makenetcdfjson +*/ + #ifdef HAVE_CONFIG_H #include "config.h" -#endif +#endif /*HAVE_CONFIG_H*/ #include #include #include @@ -16,20 +25,19 @@ TODO: make utf8 safe #include "ncjson.h" +#undef NCJCATCH #undef NCJDEBUG -#define NCJTRACE +#undef NCJTRACE -#ifdef NCJDEBUG +#ifdef NCJCATCH /* Warning: do not evaluate err more than once */ #define NCJTHROW(err) ncjbreakpoint(err) static int ncjbreakpoint(int err) {return err;} -#else +#else /*!NCJCATCH*/ #define NCJTHROW(err) (err) -#endif +#endif /*NCJCATCH*/ /**************************************************/ -#define NCJ_OK 0 -#define NCJ_ERR (-1) #define NCJ_EOF -2 @@ -48,6 +56,8 @@ static int ncjbreakpoint(int err) {return err;} /* JSON_WORD Subsumes Number also */ #define JSON_WORD "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$+-." +#define NCJ_DEFAULTALLOC 16 + /**************************************************/ typedef struct NCJparser { char* text; @@ -61,6 +71,7 @@ typedef struct NCJparser { # define NCJ_TRACE 1 } NCJparser; +/* This is used only by the unparser */ typedef struct NCJbuf { size_t len; /* |text|; does not include nul terminator */ char* text; /* NULL || nul terminated */ @@ -71,20 +82,20 @@ typedef struct NCJbuf { #if defined(_WIN32) && !defined(__MINGW32__) #define strdup _strdup #define strcasecmp _stricmp -#else +#else /*!WIN32 || __MINGW32*/ #include -#endif +#endif /*defined(_WIN32) && !defined(__MINGW32__)*/ #ifndef nullfree #define nullfree(x) {if(x)free(x);} -#endif +#endif /*nullfree*/ #ifndef nulldup #define nulldup(x) ((x)?strdup(x):(x)) -#endif +#endif /*nulldup*/ #if defined NCJDEBUG || defined NCJTRACE static char* tokenname(int token); -#endif +#endif /*defined NCJDEBUG || defined NCJTRACE*/ /**************************************************/ /* Forward */ @@ -101,39 +112,38 @@ static void NCJreclaimArray(struct NCjlist*); static void NCJreclaimDict(struct NCjlist*); static int NCJunescape(NCJparser* parser); static char unescape1(char c); + static int listappend(struct NCjlist* list, NCjson* element); +static int listsetalloc(struct NCjlist* list, size_t sz); +static int listlookup(const struct NCjlist* list, const char* key, size_t* indexp); static int NCJcloneArray(const NCjson* array, NCjson** clonep); static int NCJcloneDict(const NCjson* dict, NCjson** clonep); + +/* These are used only by the unparser */ static int NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags); static int bytesappendquoted(NCJbuf* buf, const char* s); static int bytesappend(NCJbuf* buf, const char* s); static int bytesappendc(NCJbuf* bufp, char c); -/* Static'ize everything for plugins */ +/* Hide everything for plugins */ #ifdef NETCDF_JSON_H #define OPTSTATIC static -static int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); -static int NCJnew(int sort, NCjson** objectp); -static int NCJnewstring(int sort, const char* value, NCjson** jsonp); -static int NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp); -static int NCJclone(const NCjson* json, NCjson** clonep); -static int NCJaddstring(NCjson* json, int sort, const char* s); -static int NCJinsert(NCjson* object, const char* key, NCjson* jvalue); -static int NCJinsertstring(NCjson* object, const char* key, const char* value); -static int NCJinsertint(NCjson* object, const char* key, long long ivalue); -static int NCJappend(NCjson* object, NCjson* value); -static int NCJunparse(const NCjson* json, unsigned flags, char** textp); #else /*!NETCDF_JSON_H*/ #define OPTSTATIC #endif /*NETCDF_JSON_H*/ +/* List legal nan and infinity names (lower case); keep in strcasecmp sorted order */ +static const char* NANINF[] = {"-infinity","infinity","-infinityf","infinityf","nan","nanf"}; +static const size_t NNANINF = 6; + /**************************************************/ OPTSTATIC int NCJparse(const char* text, unsigned flags, NCjson** jsonp) { - return NCJparsen(strlen(text),text,flags,jsonp); + size_t textlen = strlen(text); + return NCJparsen(textlen,text,flags,jsonp); } OPTSTATIC int @@ -167,7 +177,7 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) parser->status = NCJ_OK; #ifdef NCJDEBUG fprintf(stderr,"json: |%s|\n",parser->text); -#endif +#endif /*NCJDEBUG*/ if((stat=NCJparseR(parser,&json))==NCJ_ERR) goto done; /* Must consume all of the input */ if(parser->pos != (parser->text+len)) {stat = NCJ_ERR; goto done;} @@ -375,7 +385,7 @@ NCJlex(NCJparser* parser) } else if(c == NCJ_ESCAPE) { parser->pos++; c = *parser->pos; - *parser->pos = unescape1(c); + *parser->pos = (char)unescape1(c); continue; } else if(strchr(JSON_WORD, c) != NULL) { start = parser->pos; @@ -388,14 +398,14 @@ NCJlex(NCJparser* parser) count = (size_t)((parser->pos) - start); if(NCJyytext(parser,start,count)) goto done; /* Discriminate the word string to get the proper sort */ - if(testbool(parser->yytext) == NCJ_OK) + if(testbool(parser->yytext)) token = NCJ_BOOLEAN; /* do int test first since double subsumes int */ - else if(testint(parser->yytext) == NCJ_OK) + else if(testint(parser->yytext)) token = NCJ_INT; - else if(testdouble(parser->yytext) == NCJ_OK) + else if(testdouble(parser->yytext)) token = NCJ_DOUBLE; - else if(testnull(parser->yytext) == NCJ_OK) + else if(testnull(parser->yytext)) token = NCJ_NULL; else token = NCJ_STRING; @@ -422,7 +432,7 @@ NCJlex(NCJparser* parser) } #ifdef NCJDEBUG fprintf(stderr,"%s(%d): |%s|\n",tokenname(token),token,parser->yytext); -#endif +#endif /*NCJDEBUG*/ } /*for(;;)*/ done: if(parser->status == NCJ_ERR) @@ -436,16 +446,15 @@ fprintf(stderr,"%s(%d): |%s|\n",tokenname(token),token,parser->yytext); } fprintf(stderr,">>>> token=%s:'%s'\n",tokenname(token),(txt?txt:"")); } -#endif +#endif /*NCJTRACE*/ return token; } static int testnull(const char* word) { - if(strcasecmp(word,NCJ_TAG_NULL)==0) - return NCJTHROW(NCJ_OK); - return NCJTHROW(NCJ_ERR); + if(strcasecmp(word,NCJ_TAG_NULL)==0) return 1; + return 0; } static int @@ -453,8 +462,8 @@ testbool(const char* word) { if(strcasecmp(word,NCJ_TAG_TRUE)==0 || strcasecmp(word,NCJ_TAG_FALSE)==0) - return NCJTHROW(NCJ_OK); - return NCJTHROW(NCJ_ERR); + return 1; + return 0; } static int @@ -465,7 +474,17 @@ testint(const char* word) int count = 0; /* Try to convert to number */ ncvt = sscanf(word,"%lld%n",&i,&count); - return NCJTHROW((ncvt == 1 && strlen(word)==(size_t)count ? NCJ_OK : NCJ_ERR)); + return (ncvt == 1 && strlen(word)==((size_t)count) ? 1 : 0); +} + +static int +nancmp(const void* keyp, const void* membpp) +{ + int cmp; + const char* key = (const char*)keyp; + const char** membp = (const char**)membpp; + cmp = strcasecmp(key,*membp); + return cmp; } static int @@ -474,17 +493,14 @@ testdouble(const char* word) int ncvt; double d; int count = 0; + void* pos = NULL; + /* Check for Nan and Infinity */ - if(0==(int)strcasecmp("nan",word)) return NCJTHROW(NCJ_OK); - if(0==(int)strcasecmp("infinity",word)) return NCJTHROW(NCJ_OK); - if(0==(int)strcasecmp("-infinity",word)) return NCJTHROW(NCJ_OK); - /* Allow the XXXf versions as well */ - if(0==(int)strcasecmp("nanf",word)) return NCJTHROW(NCJ_OK); - if(0==(int)strcasecmp("infinityf",word)) return NCJTHROW(NCJ_OK); - if(0==(int)strcasecmp("-infinityf",word)) return NCJTHROW(NCJ_OK); + pos = bsearch(word, NANINF, NNANINF, sizeof(char*), nancmp); + if(pos != NULL) return 1; /* Try to convert to number */ - ncvt = sscanf(word,"%lg%n",&d,&count); - return NCJTHROW((ncvt == 1 && strlen(word)==(size_t)count ? NCJ_OK : NCJ_ERR)); + ncvt = sscanf(word,"%lg%n",&d,&count); + return (ncvt == 1 && strlen(word)==((size_t)count) ? 1 : 0); } static int @@ -537,6 +553,7 @@ NCJreclaimArray(struct NCjlist* array) } nullfree(array->contents); array->contents = NULL; + array->len = 0; } static void @@ -593,8 +610,7 @@ NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp) if(jsonp) *jsonp = NULL; if(value == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} - if((stat = NCJnew(sort,&json))==NCJ_ERR) - goto done; + if((stat = NCJnew(sort,&json))==NCJ_ERR) goto done; if((json->string = (char*)malloc(len+1))==NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} memcpy(json->string,value,len); @@ -607,18 +623,20 @@ NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp) } OPTSTATIC int -NCJdictget(const NCjson* dict, const char* key, const NCjson** valuep) +NCJdictget(const NCjson* dict, const char* key, NCjson** jvaluep) { int stat = NCJ_OK; size_t i; if(dict == NULL || dict->sort != NCJ_DICT) {stat = NCJTHROW(NCJ_ERR); goto done;} - if(valuep) {*valuep = NULL;} - for(i=0;istring != NULL && strcmp(jkey->string,key)==0) { - if(valuep) {*valuep = NCJith(dict,i+1); break;} + if(jvaluep) {*jvaluep = jvalue;} + break; } } @@ -626,6 +644,17 @@ NCJdictget(const NCjson* dict, const char* key, const NCjson** valuep) return NCJTHROW(stat); } +/* Functional version of NCJdictget */ +OPTSTATIC NCjson* +NCJdictlookup(const NCjson* dict, const char* key) +{ + int stat; + NCjson* jvalue = NULL; + stat = NCJdictget(dict,key,&jvalue); + if(stat != NCJ_OK) jvalue = NULL; + return jvalue; +} + /* Unescape the text in parser->yytext; can do in place because unescaped string will always be shorter */ @@ -649,7 +678,7 @@ NCJunescape(NCJparser* parser) default: break;/* technically not Json conformant */ } } - *q++ = c; + *q++ = (char)c; } *q = '\0'; return NCJTHROW(NCJ_OK); @@ -695,7 +724,7 @@ tokenname(int token) } return ("NCJ_UNDEF"); } -#endif +#endif /*defined NCJDEBUG || defined NCJTRACE*/ /* Convert a JSON value to an equivalent value of a specified sort */ OPTSTATIC int @@ -723,7 +752,7 @@ NCJcvt(const NCjson* jvalue, int outsort, struct NCJconst* output) break; case CASE(NCJ_INT,NCJ_BOOLEAN): - sscanf(jvalue->string,"%lldd",&output->ival); + sscanf(jvalue->string,"%lld",&output->ival); output->bval = (output->ival?1:0); break; case CASE(NCJ_INT,NCJ_INT): @@ -778,30 +807,72 @@ static int listappend(struct NCjlist* list, NCjson* json) { int stat = NCJ_OK; - NCjson** newcontents = NULL; assert(list->len == 0 || list->contents != NULL); - if(json == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if(list->len == 0) { - nullfree(list->contents); - list->contents = (NCjson**)calloc(2,sizeof(NCjson*)); - if(list->contents == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - list->contents[0] = json; - list->len++; - } else { - if((newcontents = (NCjson**)calloc((size_t)(2*list->len)+1,sizeof(NCjson*)))==NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - memcpy(newcontents,list->contents, (size_t)list->len*sizeof(NCjson*)); - newcontents[list->len] = json; - list->len++; - free(list->contents); - list->contents = newcontents; newcontents = NULL; + if(json == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} + /* Make space for two new elements; better than one, but still probably not optimal */ + if((stat = listsetalloc(list,list->len + 2))<0) goto done; + /* Append the new item */ + list->contents[list->len++] = json; + +done: + return NCJTHROW(stat); +} + +/* Locate the index of a key in a list of (key,value) pairs. +@param list pointer to the list +@param key for which to search +@param indexp store index of match here +@return NCJ_OK if key found, NCJ_EOF if not found, NCJ_ERR if error +*/ +static int +listlookup(const struct NCjlist* list, const char* key, size_t* indexp) +{ + int stat = NCJ_OK; + int i,len,match = -1; + + if(list == NULL || key == NULL || strlen(key) == 0 || list->len %2 == 1) + {stat = NCJTHROW(NCJ_ERR); goto done;} + len = (int)list->len; /* => |list| < 2 billion or do */ + for(i=0;icontents[i]; + if(jkey != NULL && jkey->string != NULL && strcmp(jkey->string,key)==0) {match = i;break;} } + if(match < 0) {stat = NCJ_EOF;} else {if(indexp) *indexp = (size_t)match;} +done: + return NCJTHROW(stat); +} + +/* Increase the space available to dict/array. + Even if alloc is zero, ensure that the object's list alloc is >= 1. +@param list pointer to the list +@param alloc increase allocation to this size +@return NCJ_ERR|NCJ_OK +*/ +static int +listsetalloc(struct NCjlist* list, size_t alloc) +{ + int stat = NCJ_OK; + NCjson** newcontents = NULL; + if(list == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} + assert(list->alloc == 0 || list->contents != NULL); + if(alloc == 0) alloc = 1; /* Guarantee that the list->content is not NULL */ + if(list->alloc >= alloc) goto done; + /* Since alloc > list->alloc > 0, we need to allocate space */ + if((newcontents=(NCjson**)calloc(alloc,sizeof(NCjson*))) == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} + list->alloc = alloc; + if(list->contents != NULL && list->len > 0) { + /* Preserve any existing contents */ + memcpy((void*)newcontents, + (void*)list->contents, + sizeof(NCjson*)*list->len); + } + free(list->contents); + list->contents = newcontents; newcontents = NULL; + assert(list->alloc > 0 && list->contents != NULL); done: - nullfree(newcontents); + if(newcontents != NULL) free(newcontents); return NCJTHROW(stat); } @@ -812,6 +883,8 @@ NCJclone(const NCjson* json, NCjson** clonep) { int stat = NCJ_OK; NCjson* clone = NULL; + + if(clonep) *clonep = NULL; if(json == NULL) goto done; switch(NCJsort(json)) { case NCJ_INT: @@ -846,7 +919,8 @@ NCJcloneArray(const NCjson* array, NCjson** clonep) size_t i; NCjson* clone = NULL; if((stat=NCJnew(NCJ_ARRAY,&clone))==NCJ_ERR) goto done; - for(i=0;ilist,array->list.len))<0) goto done; + for(i=0;ilist,dict->list.len))<0) goto done; + for(i=0;isort != NCJ_DICT || key == NULL || jvalue == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if((stat = NCJnewstring(NCJ_STRING,key,&jkey))==NCJ_ERR) goto done; - if((stat = NCJappend(object,jkey))==NCJ_ERR) goto done; - if((stat = NCJappend(object,jvalue))==NCJ_ERR) goto done; + NCjson* jprev = NULL; + int found; + + if(jdict == NULL + || NCJsort(jdict) != NCJ_DICT + || key == NULL + || jvalue == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} + for(found=(-1),i=0;i < NCJdictlength(jdict); i++) { + jkey = NCJdictkey(jdict,i); + if (jkey != NULL && strcmp(NCJstring(jkey), key) == 0) { + found = (int)i; + break; + } + } + if(found >= 0) { + jprev = NCJdictvalue(jdict,found); + // replace existing values for new key + NCJreclaim(jprev); // free old value + NCJdictvalue(jdict,found) = jvalue; jvalue = NULL; + jkey = NULL; /* avoid reclamation */ + } else { /* not found */ + if((stat=listsetalloc(&jdict->list,jdict->list.len+2))<0) goto done; + NCJcheck(NCJnewstring(NCJ_STRING, key, (NCjson**)&jkey)); + NCJcheck(NCJappend(jdict,jkey)); jkey = NULL; + NCJcheck(NCJappend(jdict,jvalue)); jvalue = NULL; + } done: + NCJreclaim(jkey); + NCJreclaim(jvalue); return NCJTHROW(stat); } -/* Insert key-value pair as strings into a dict object. - key and value will be strdup'd */ +/* Insert key-value pair into a dict object. key will be strdup'd */ OPTSTATIC int NCJinsertstring(NCjson* object, const char* key, const char* value) { int stat = NCJ_OK; + NCjson* jkey = NULL; NCjson* jvalue = NULL; - if(value == NULL) - NCJnew(NCJ_NULL,&jvalue); - else - NCJnewstring(NCJ_STRING,value,&jvalue); - NCJinsert(object,key,jvalue); + if(key == NULL || value == NULL) + {stat = NCJTHROW(NCJ_ERR); goto done;} + if((stat = NCJnewstring(NCJ_STRING,key,&jkey))==NCJ_ERR) goto done; + if((stat = NCJnewstring(NCJ_STRING,value,&jvalue))==NCJ_ERR) goto done; + if((stat = NCJappend(object,jkey))==NCJ_ERR) goto done; + if((stat = NCJappend(object,jvalue))==NCJ_ERR) goto done; +done: return NCJTHROW(stat); } -/* Insert key-value pair with value being an integer */ +/* Insert key-value pair into a dict object. key will be strdup'd */ OPTSTATIC int -NCJinsertint(NCjson* object, const char* key, long long ivalue) +NCJinsertint(NCjson* object, const char* key, long long value) { int stat = NCJ_OK; + NCjson* jkey = NULL; NCjson* jvalue = NULL; - char digits[128]; - snprintf(digits,sizeof(digits),"%lld",ivalue); - NCJnewstring(NCJ_STRING,digits,&jvalue); - NCJinsert(object,key,jvalue); + char digits[64]; + + if(key == NULL) + {stat = NCJTHROW(NCJ_ERR); goto done;} + if((stat = NCJnewstring(NCJ_STRING,key,&jkey))==NCJ_ERR) goto done; + snprintf(digits,sizeof(digits),"%lld",value); + if((stat = NCJnewstring(NCJ_INT,digits,&jvalue))==NCJ_ERR) goto done; + listsetalloc(&object->list,object->list.len + 2); + if((stat = NCJappend(object,jkey))==NCJ_ERR) goto done; + if((stat = NCJappend(object,jvalue))==NCJ_ERR) goto done; +done: return NCJTHROW(stat); } @@ -954,6 +1065,76 @@ NCJappend(NCjson* object, NCjson* value) return NCJTHROW(NCJ_OK); } +/* Append string value to an array or dict object. */ +OPTSTATIC int +NCJappendstring(NCjson* object, int sort, const char* s) +{ + NCjson* js = NULL; + if(object == NULL || s == NULL) + return NCJTHROW(NCJ_ERR); + NCJnewstring(sort,s,&js); + switch (object->sort) { + case NCJ_ARRAY: + case NCJ_DICT: + listappend(&object->list,js); + break; + default: + return NCJTHROW(NCJ_ERR); + } + return NCJTHROW(NCJ_OK); +} + +/* Append int value into an array/dict object. */ +OPTSTATIC int +NCJappendint(NCjson* object, long long value) +{ + int stat = NCJ_OK; + NCjson* jvalue = NULL; + char digits[64]; + + snprintf(digits,sizeof(digits),"%lld",value); + NCJcheck(NCJnewstring(NCJ_INT,digits,&jvalue)); + NCJcheck(NCJappend(object,jvalue)); jvalue = NULL; +done: + NCJreclaim(jvalue); + return NCJTHROW(stat); +} + +/* Overwrite key-value pair in a dict object. + If key does not exist, then act like NCJinsert(). +*/ +OPTSTATIC int +NCJoverwrite(NCjson* dict, const char* key, NCjson* jvalue) +{ + int stat = NCJ_OK; + size_t index; + NCjson* jkey = NULL; + NCjson* oldvalue = NULL; + + if(dict == NULL + || dict->sort != NCJ_DICT + || key == NULL + || jvalue == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} + /* See if key already exists */ + switch(stat=listlookup(&dict->list,key,&index)) { + case NCJ_OK: + /* Overwrite value part */ + oldvalue = dict->list.contents[index]; + dict->list.contents[index] = jvalue; + NCJreclaim(oldvalue); + break; + case NCJ_EOF: /* Not found */ + if((stat=listsetalloc(&dict->list,dict->list.len+2))<0) goto done; + if((stat = NCJappend(dict,jkey))==NCJ_ERR) goto done; + if((stat = NCJappend(dict,jvalue))==NCJ_ERR) goto done; + break; + case NCJ_ERR: + default: goto done; + } +done: + return NCJTHROW(stat); +} + /**************************************************/ /* Unparser to convert NCjson object to text in buffer */ @@ -1047,7 +1228,7 @@ escape(const char* text, NCJbuf* buf) bytesappendc(buf,NCJ_ESCAPE); bytesappendc(buf,replace); } else - bytesappendc(buf,c); + bytesappendc(buf,(char)c); } return NCJTHROW(NCJ_OK); } @@ -1101,22 +1282,12 @@ bytesappendc(NCJbuf* bufp, const char c) return bytesappend(bufp,s); } -OPTSTATIC void -NCJdump(const NCjson* json, unsigned flags, FILE* out) -{ - char* text = NULL; - (void)NCJunparse(json,0,&text); - if(out == NULL) out = stderr; - fprintf(out,"%s\n",text); - fflush(out); - nullfree(text); -} - OPTSTATIC const char* -NCJtotext(const NCjson* json) +NCJtotext(const NCjson* json, unsigned flags) { static char outtext[4096]; char* text = NULL; + NC_UNUSED(flags); if(json == NULL) {strcpy(outtext,""); goto done;} (void)NCJunparse(json,0,&text); strncpy(outtext,text,sizeof(outtext)); @@ -1125,20 +1296,54 @@ NCJtotext(const NCjson* json) return outtext; } +OPTSTATIC void +NCJdump(const NCjson* json, unsigned flags, FILE* out) +{ + const char* text = NCJtotext(json,flags); + if(out == NULL) out = stderr; + fprintf(out,"%s\n",text); + fflush(out); +} + +static int +pairsort(const void* a, const void* b) +{ + const NCjson** j1 = (const NCjson**)a; + const NCjson** j2 = (const NCjson**)b; + return strcmp(NCJstring(*j1),NCJstring(*j2)); +} + +OPTSTATIC void +NCJdictsort(NCjson* jdict) +{ + assert(NCJsort(jdict) == NCJ_DICT); + qsort((void*)NCJcontents(jdict),NCJdictlength(jdict),2*sizeof(NCjson*),pairsort); +} + /* Hack to avoid static unused warning */ static void netcdf_supresswarnings(void) { - void* ignore; + void* ignore = NULL; ignore = (void*)netcdf_supresswarnings; - ignore = (void*)NCJinsert; - ignore = (void*)NCJaddstring; - ignore = (void*)NCJcvt; - ignore = (void*)NCJdictget; ignore = (void*)NCJparse; - ignore = (void*)NCJdump; - ignore = (void*)NCJtotext; + ignore = (void*)NCJparsen; + ignore = (void*)NCJreclaim; + ignore = (void*)NCJnew; + ignore = (void*)NCJnewstring; + ignore = (void*)NCJnewstringn; + ignore = (void*)NCJdictget; + ignore = (void*)NCJdictlookup; + ignore = (void*)NCJcvt; + ignore = (void*)NCJaddstring; + ignore = (void*)NCJappend; + ignore = (void*)NCJappendstring; + ignore = (void*)NCJappendint; + ignore = (void*)NCJinsert; ignore = (void*)NCJinsertstring; ignore = (void*)NCJinsertint; + ignore = (void*)NCJoverwrite; + ignore = (void*)NCJdictsort; + ignore = (void*)NCJdump; (void)ignore; } diff --git a/libdispatch/nclog.c b/libdispatch/nclog.c index c9664a23cf..1fd0718937 100644 --- a/libdispatch/nclog.c +++ b/libdispatch/nclog.c @@ -286,6 +286,8 @@ ncuntrace(const char* fcn, int err, const char* fmt, ...) int ncthrow(int err,const char* file,int line) { + NC_UNUSED(file); + NC_UNUSED(line); if(err == 0) return err; return ncbreakpoint(err); } diff --git a/libdispatch/ncproplist.c b/libdispatch/ncproplist.c index 77d8023a00..8a5180de59 100644 --- a/libdispatch/ncproplist.c +++ b/libdispatch/ncproplist.c @@ -27,7 +27,15 @@ #endif /**************************************************/ - +/* Hide everything for plugins */ +#ifdef NETCDF_PROPLIST_H +#define OPTSTATIC static +#else /*!NETCDF_PROPLIST_H*/ +#define OPTSTATIC +#endif /*NETCDF_PROPLIST_H*/ + +/**************************************************/ + #define MINPROPS 2 #define EXPANDFACTOR 1 @@ -49,7 +57,7 @@ static int ncproplistfree(NCproplist* plist); static int ncproplistadd(NCproplist* plist, const char* key, uintptr_t value); static int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size); static int ncproplistaddstring(NCproplist* plist, const char* key, const char* str); -static int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPreclaimfcn fcn); +static int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPtypefcn fcn); static int ncproplistclone(const NCproplist* src, NCproplist* clone); static int ncproplistget(const NCproplist* plist, const char* key, uintptr_t* valuep, uintptr_t* sizep); static int ncproplistith(const NCproplist* plist, size_t i, char* const * keyp, uintptr_t const * valuep, uintptr_t* sizep); @@ -59,7 +67,7 @@ static int ncproplistith(const NCproplist* plist, size_t i, char* const * keyp, /** - * Create new property list + * Create new empty property list * @return pointer to the created property list. */ OPTSTATIC NCproplist* @@ -73,151 +81,175 @@ ncproplistnew(void) } /** - * Reclaim memory used by a property list - * @param plist to reclaim - * @return NC_NOERR if succeed, NC_EXXX otherwise. + * Reclaim property pairs used by a property list + * @param plist to clear + * @return >= 0 if succeed, < 0 if fail */ -OPTSTATIC int -ncproplistfree(NCproplist* plist) +static int +ncproplistclear(NCproplist* plist) { - int stat = NC_NOERR; + int stat = 0; size_t i; if(plist == NULL) goto done; if(plist->properties != NULL) { for(i=0;icount;i++) { - NCProperty* prop = &plist->properties[i]; - void* ptr = (void*)prop->value; /* convert to ptr */ - assert(prop->flags & (NCPF_SIMPLE|NCPF_BYTES|NCPF_COMPLEX)); - if(prop->flags & NCPF_SIMPLE) continue; /* no reclaim needed */ - if(prop->flags & NCPF_BYTES) { + NCPproperty* prop = &plist->properties[i]; + void* ptr = (void*)prop->pair.value; /* convert to ptr */ + switch (prop->pair.sort) { + case NCP_CONST: /* value need not be free'd */ + break; + case NCP_BYTES: /* simple free of the value */ if(ptr != NULL) free(ptr); - } else { /* (prop->flags & NCPF_COMPLEX) */ - int ok; - assert(prop->reclaim != NULL); - ok = prop->reclaim(prop->userdata, prop->key, ptr, prop->size); - if(!ok && stat == NC_NOERR) stat = NC_EINVAL; + break; + case NCP_COMPLEX: /* Need the typeop fcn */ + assert(prop->typefcn != NULL); + stat = prop->typefcn(NCP_RECLAIM,&prop->pair,NULL); + if(stat < 0) goto done; + break; } } - free(plist->properties); } + plist->count = 0; +done: + return stat; +} + +/** + * Reclaim memory used by a property list + * @param plist to reclaim + * @return >= 0 if succeed, < 0 if fail + */ +OPTSTATIC int +ncproplistfree(NCproplist* plist) +{ + int stat = 0; + if(plist == NULL) goto done; + if((stat = ncproplistclear(plist))<0) goto done; + free(plist->properties); free(plist); done: return stat; } /** - * Add a non-reclaimable entry to the property list + * Add an NCP_CONST entry to the property list * @param plist into which the value is be inserted. * @param key * @param value - * @return NC_NOERR if succeed, NC_EXXX otherwise. + * @return >= 0 if succeed, < 0 if fail */ OPTSTATIC int ncproplistadd(NCproplist* plist, const char* key, uintptr_t value) { int stat = NC_NOERR; - NCProperty* prop = NULL; + NCPproperty* prop = NULL; size_t keylen; if(plist == NULL) goto done; if(!hasspace(plist,1)) {if((stat = extendplist(plist,(plist->count+1)*EXPANDFACTOR))) goto done;} /* extra space */ prop = &plist->properties[plist->count]; keylen = strlen(key); if(keylen > NCPROPSMAXKEY) keylen = NCPROPSMAXKEY; /* truncate */ - memcpy(prop->key,key,keylen); - prop->key[keylen] = '\0'; - prop->value = value; - prop->flags = NCPF_SIMPLE; + memcpy(prop->pair.key,key,keylen); + prop->pair.key[keylen] = '\0'; + prop->pair.value = value; + prop->pair.sort = NCP_CONST; plist->count++; done: return stat; } - + /** - * Add a reclaimable entry to the property list, where the value - * can be reclaimed using a simple free(); + * Add a byte string to the property list. + * The proplist takes control of the value => do not free. * @param plist into which the value is be inserted. * @param key * @param value ptr to memory chunk - * @param size |*value| - * @return NC_NOERR if succeed, NC_EXXX otherwise. + * @param size |value| + * @return >= 0 if succeed, < 0 if fail */ OPTSTATIC int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size) { int stat = NC_NOERR; - NCProperty* prop = NULL; + NCPproperty* prop = NULL; size_t keylen; + + NC_UNUSED(size); if(plist == NULL) goto done; if(!hasspace(plist,1)) {if((stat = extendplist(plist,(plist->count+1)*EXPANDFACTOR))) goto done;} /* extra space */ prop = &plist->properties[plist->count]; keylen = strlen(key); if(keylen > NCPROPSMAXKEY) keylen = NCPROPSMAXKEY; /* truncate */ - memcpy(prop->key,key,keylen); - prop->key[keylen] = '\0'; - prop->value = (uintptr_t)value; - prop->flags = NCPF_BYTES; + memcpy(prop->pair.key,key,keylen); + prop->pair.key[keylen] = '\0'; + prop->pair.value = (uintptr_t)value; + prop->pair.sort = NCP_BYTES; plist->count++; done: return stat; } /** - * Add a reclaimable entry to the property list, where the value - * can be reclaimed using a simple free(); + * Add a nul terminated string to the property list. + * Wraps ncproplistaddbytes. + * The proplist takes control of the value => do not free. * @param plist into which the value is be inserted. * @param key - * @param value ptr to memory chunk - * @param size |*value| - * @return NC_NOERR if succeed, NC_EXXX otherwise. + * @param value ptr to char* string + * @param size strlen(value)+1 + * @return >= 0 if succeed, < 0 if fail. */ OPTSTATIC int ncproplistaddstring(NCproplist* plist, const char* key, const char* str) { uintptr_t size = 0; - if(str) size = (uintptr_t)strlen(str); + if(str) size = (uintptr_t)(strlen(str)+1); return ncproplistaddbytes(plist,key,(void*)str,size); } /** * Most general case for adding a property. + * The value is always a ptr to some arbitrary complex structure. + * The proplist takes control of the value => do not free. * @param plist into which the value is be inserted. * @param key * @param value * @param size * @param userdata extra environment data for the reclaim function. - * @param fcn the reclaim function - * @return NC_NOERR if succeed, NC_EXXX otherwise. + * @param fcn the type operations function + * @return >= 0 if succeed, < 0 otherwise. */ OPTSTATIC int -ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPreclaimfcn fcn) +ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPtypefcn fcn) { int stat = NC_NOERR; - NCProperty* prop = NULL; + NCPproperty* prop = NULL; size_t keylen; if(plist == NULL) goto done; if(!hasspace(plist,1)) {if((stat = extendplist(plist,(plist->count+1)*EXPANDFACTOR))) goto done;} /* extra space */ prop = &plist->properties[plist->count]; keylen = strlen(key); if(keylen > NCPROPSMAXKEY) keylen = NCPROPSMAXKEY; /* truncate */ - memcpy(prop->key,key,keylen); - prop->key[keylen] = '\0'; - prop->value = (uintptr_t)value; - prop->size = size; - prop->reclaim = fcn; + memcpy(prop->pair.key,key,keylen); + prop->pair.key[keylen] = '\0'; + prop->pair.value = (uintptr_t)value; + prop->pair.size = size; + prop->typefcn = fcn; prop->userdata = userdata; - prop->flags = NCPF_COMPLEX; + prop->pair.sort = NCP_COMPLEX; plist->count++; done: return stat; } +/* Clone using the NCtypefcn to copy values */ OPTSTATIC int ncproplistclone(const NCproplist* src, NCproplist* clone) { int stat = NC_NOERR; size_t i; - NCProperty* srcprops; - NCProperty* cloneprops; + NCPproperty* srcprops; + NCPproperty* cloneprops; if(src == NULL || clone == NULL) {stat = NC_EINVAL; goto done;} if((stat=ncproplistinit(clone))) goto done; @@ -225,15 +257,23 @@ ncproplistclone(const NCproplist* src, NCproplist* clone) srcprops = src->properties; cloneprops = clone->properties; for(i=0;icount;i++) { - cloneprops[i] = srcprops[i]; - strncpy(cloneprops[i].key,srcprops[i].key,sizeof(cloneprops[i].key)); -#if 0 - cloneprops[i]->flags = srcprops[i]->flags; - cloneprops[i]->value = srcprops[i]->value; - cloneprops[i]->size = srcprops[i]->size; - cloneprops[i]->userdata = srcprops[i]->userdata; - cloneprops[i]->reclaim = srcprops->reclaim; -#endif + NCPproperty* sp = &srcprops[i]; + NCPproperty* cp = &cloneprops[i]; + void* p = NULL; + *cp = *sp; /* Do a mass copy of the property and then fixup as needed */ + switch (sp->pair.sort) { + case NCP_CONST: + break; + case NCP_BYTES: + p = malloc(cp->pair.size); + memcpy(p,(void*)sp->pair.value,sp->pair.size); + cp->pair.value = (uintptr_t)p; + break; + case NCP_COMPLEX: /* Need the typeop fcn */ + stat = sp->typefcn(NCP_COPY,&sp->pair,&cp->pair); + if(stat < 0) goto done; + break; + } } done: return stat; @@ -245,10 +285,10 @@ extendplist(NCproplist* plist, size_t nprops) { int stat = NC_NOERR; size_t newsize = plist->count + nprops; - NCProperty* newlist = NULL; + NCPproperty* newlist = NULL; if((plist->alloc >= newsize) || (nprops == 0)) goto done; /* Already enough space */ - newlist = realloc(plist->properties,newsize*sizeof(NCProperty)); + newlist = realloc(plist->properties,newsize*sizeof(NCPproperty)); if(newlist == NULL) {stat = NC_ENOMEM; goto done;} plist->properties = newlist; newlist = NULL; plist->alloc = newsize; @@ -269,14 +309,14 @@ ncproplistget(const NCproplist* plist, const char* key, uintptr_t* valuep, uintp { int stat = NC_ENOOBJECT; /* assume not found til proven otherwise */ size_t i; - NCProperty* props; + NCPproperty* props; uintptr_t value = 0; uintptr_t size = 0; if(plist == NULL || key == NULL) goto done; for(i=0,props=plist->properties;icount;i++,props++) { - if(strcmp(props->key,key)==0) { - value = props->value; - size = props->size; + if(strcmp(props->pair.key,key)==0) { + value = props->pair.value; + size = props->pair.size; stat = NC_NOERR; /* found */ break; } @@ -302,13 +342,13 @@ OPTSTATIC int ncproplistith(const NCproplist* plist, size_t i, char* const * keyp, uintptr_t const * valuep, uintptr_t* sizep) { int stat = NC_NOERR; - NCProperty* prop = NULL; + NCPproperty* prop = NULL; if(plist == NULL) goto done; if(i >= plist->count) {stat = NC_EINVAL; goto done;} prop = &plist->properties[i]; - if(keyp) *((char**)keyp) = (char*)prop->key; - if(valuep) *((uintptr_t*)valuep) = (uintptr_t)prop->value; - if(sizep) *sizep = prop->size; + if(keyp) *((char**)keyp) = (char*)prop->pair.key; + if(valuep) *((uintptr_t*)valuep) = (uintptr_t)prop->pair.value; + if(sizep) *sizep = prop->pair.size; done: return stat; } @@ -322,11 +362,17 @@ ncproplistith(const NCproplist* plist, size_t i, char* const * keyp, uintptr_t c static int ncproplistinit(NCproplist* plist) { - /* Assume property list will hold at lease MINPROPS properties */ - plist->alloc = MINPROPS; - plist->count = 0; - plist->properties = (NCProperty*)calloc(MINPROPS,sizeof(NCProperty)); - return (plist->properties?NC_NOERR:NC_ENOMEM); + int stat = 0; + /* Assume property list will hold at lease MINPROPS properties */ + if(plist->alloc == 0) { + plist->alloc = MINPROPS; + plist->properties = (NCPproperty*)calloc(plist->alloc,sizeof(NCPproperty)); + plist->count = 0; + } else { + if((stat = ncproplistclear(plist))<0) goto done; + } +done: + return stat; } /* Suppress unused statics warning */ @@ -347,4 +393,3 @@ ncproplist_unused(void) unused = (void*)ncproplistith; unused = unused; } - diff --git a/libdispatch/ncs3sdk_h5.c b/libdispatch/ncs3sdk_h5.c index 359ab2f8b3..27bb0e2038 100644 --- a/libdispatch/ncs3sdk_h5.c +++ b/libdispatch/ncs3sdk_h5.c @@ -101,6 +101,7 @@ static int s3objectsinfo(NClist* contents, NClist* keys, NClist* lens); static int s3commonprefixes(NClist* list, NClist* keys); static int mergekeysets(NClist*,NClist*,NClist*); static int rawtokeys(s3r_buf_t* response, NClist* keys, NClist* lengths, struct LISTOBJECTSV2** listv2p); +static int httptonc(long httpcode); static int queryadd(NClist* query, const char* key, const char* value); static int queryend(NClist* query, char** querystring); @@ -193,7 +194,7 @@ NC_s3sdkcreateclient(NCS3INFO* info) done: nullfree(urlroot); if(stat && s3client) { - NC_s3sdkclose(s3client,info,0,NULL); + NC_s3sdkclose(s3client,NULL); s3client = NULL; } NCNILTRACE(NC_NOERR); @@ -213,6 +214,7 @@ NC_s3sdkbucketexists(void* s3client0, const char* bucket, int* existsp, char** e if((stat = makes3fullpath(s3client->rooturl,bucket,NULL,NULL,url))) goto done; if((stat = NCH5_s3comms_s3r_head(s3client->h5s3client, ncbytescontents(url), NULL, NULL, &httpcode, NULL))) goto done; + stat = httptonc(httpcode); if(existsp) {*existsp = (stat == 0 && httpcode == 200);} done: @@ -255,7 +257,7 @@ NC_s3sdkbucketdelete(void* s3client0, NCS3INFO* info, char** errmsgp) /* @return NC_NOERR if key points to a content-bearing object. -@return NC_EEMPTY if object at key has no content. +@return NC_ENOOBJECT if object at key does not exist @return NC_EXXX return true error */ EXTERNL int @@ -265,13 +267,15 @@ NC_s3sdkinfo(void* s3client0, const char* bucket, const char* pathkey, size64_t* NCS3CLIENT* s3client = (NCS3CLIENT*)s3client0; NCbytes* url = ncbytesnew(); long long len = -1; + long httpcode = 0; NCTRACE(11,"bucket=%s pathkey=%s",bucket,pathkey); if((stat = makes3fullpath(s3client->rooturl,bucket,pathkey,NULL,url))) goto done; - if((stat = NCH5_s3comms_s3r_getsize(s3client->h5s3client, ncbytescontents(url), &len))) goto done; + if((stat = NCH5_s3comms_s3r_getsize(s3client->h5s3client, ncbytescontents(url), &len, &httpcode))) goto done; + stat = httptonc(httpcode); - if(lenp) {*lenp = len;} + if(lenp) {*lenp = (size64_t)len;} done: ncbytesfree(url); @@ -289,6 +293,7 @@ NC_s3sdkread(void* s3client0, const char* bucket, const char* pathkey, size64_t NCS3CLIENT* s3client = (NCS3CLIENT*)s3client0; NCbytes* url = ncbytesnew(); struct s3r_buf_t data = {0,NULL}; + long httpcode = 0; NCTRACE(11,"bucket=%s pathkey=%s start=%llu count=%llu content=%p",bucket,pathkey,start,count,content); @@ -297,8 +302,8 @@ NC_s3sdkread(void* s3client0, const char* bucket, const char* pathkey, size64_t /* Read the data */ data.count = count; data.content = content; - if((stat = NCH5_s3comms_s3r_read(s3client->h5s3client,ncbytescontents(url),(size_t)start,(size_t)count,&data))) goto done; - + if((stat = NCH5_s3comms_s3r_read(s3client->h5s3client,ncbytescontents(url),(size_t)start,(size_t)count,&data,&httpcode))) goto done; + stat = httptonc(httpcode); done: ncbytesfree(url); return NCUNTRACE(stat); @@ -315,6 +320,7 @@ NC_s3sdkwriteobject(void* s3client0, const char* bucket, const char* pathkey, s NCS3CLIENT* s3client = (NCS3CLIENT*)s3client0; NCbytes* url = ncbytesnew(); s3r_buf_t data; + long httpcode = 0; NCTRACE(11,"bucket=%s pathkey=%s count=%llu content=%p",bucket,pathkey,count,content); @@ -323,7 +329,8 @@ NC_s3sdkwriteobject(void* s3client0, const char* bucket, const char* pathkey, s /* Write the data */ data.count = count; data.content = (void*)content; - if((stat = NCH5_s3comms_s3r_write(s3client->h5s3client,ncbytescontents(url),&data))) goto done; + if((stat = NCH5_s3comms_s3r_write(s3client->h5s3client,ncbytescontents(url),&data,&httpcode))) goto done; + stat = httptonc(httpcode); done: ncbytesfree(url); @@ -331,28 +338,49 @@ NC_s3sdkwriteobject(void* s3client0, const char* bucket, const char* pathkey, s } EXTERNL int -NC_s3sdkclose(void* s3client0, NCS3INFO* info, int deleteit, char** errmsgp) +NC_s3sdkclose(void* s3client0, char** errmsgp) { int stat = NC_NOERR; NCS3CLIENT* s3client = (NCS3CLIENT*)s3client0; - NCTRACE(11,"info=%s deleteit=%d",NC_s3dumps3info(info),deleteit); - - if(deleteit) { - /* Delete the root key; ok it if does not exist */ - switch (stat = NC_s3sdkdeletekey(s3client0,info->bucket,info->rootkey,errmsgp)) { - case NC_NOERR: break; - case NC_EEMPTY: case NC_ENOTFOUND: stat = NC_NOERR; break; - default: break; + NCTRACE(11,""); + s3client_destroy(s3client); + return NCUNTRACE(stat); +} + +EXTERNL int +NC_s3sdktruncate(void* s3client0, const char* bucket, const char* prefix, char** errmsgp) +{ + int stat = NC_NOERR; + char* errmsg = NULL; + size_t nkeys; + char** keys = NULL; + NCS3CLIENT* s3client = (NCS3CLIENT*)s3client0; + + NCTRACE(11,"bucket=%s prefix=%s",bucket,prefix); + + if((stat = NC_s3sdklistall(s3client0,bucket,prefix,&nkeys,&keys,&errmsg))) goto done; + + if(nkeys > 0 && keys != NULL) { + size_t i; + /* Sort the list -- shortest first */ + NC_sortenvv(nkeys,keys); + for(i=0;ih5s3client, ncbytescontents(listurl), &response))) goto done; + if((stat = NCH5_s3comms_s3r_getkeys(s3client->h5s3client, ncbytescontents(listurl), &response, &httpcode))) goto done; + if((stat = httptonc(httpcode))) goto done; if((stat = rawtokeys(&response,allkeys,NULL,&listv2))) goto done; istruncated = (strcasecmp(listv2->istruncated,"true")==0?1:0); nullfree(continuetoken); @@ -430,19 +460,19 @@ In theory, the returned list should be sorted in lexical order, but it possible that it is not. */ EXTERNL int -NC_s3sdkgetkeys(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp) +NC_s3sdklist(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp) { NCTRACE(11,"bucket=%s prefixkey0=%s",bucket,prefixkey0); return NCUNTRACE(getkeys(s3client0, bucket, prefixkey0, "/", nkeysp, keysp, errmsgp)); } /* -Return a list of full keys of legal objects immediately below a specified key. +Return a list of full keys of legal objects below a specified key. Not necessarily sorted. Essentially same as getkeys, but with no delimiter. */ EXTERNL int -NC_s3sdksearch(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp) +NC_s3sdklistall(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp) { NCTRACE(11,"bucket=%s prefixkey0=%s",bucket,prefixkey0); return NCUNTRACE(getkeys(s3client0, bucket, prefixkey0, NULL, nkeysp, keysp, errmsgp)); @@ -461,6 +491,11 @@ NC_s3sdkdeletekey(void* s3client0, const char* bucket, const char* pathkey, char if((stat = makes3fullpath(s3client->rooturl,bucket,pathkey,NULL,url))) goto done; if((stat = NCH5_s3comms_s3r_deletekey(s3client->h5s3client, ncbytescontents(url), &httpcode))) goto done; + switch(stat = httptonc(httpcode)) { + case NC_NOERR: break; + case NC_ENOOBJECT: break; /* does not exist */ + default: goto done; + } done: ncbytesfree(url); @@ -700,8 +735,8 @@ parse_listbucketresult(char* xml, unsigned long long xmllen, struct LISTOBJECTSV if(resultp) {*resultp = result; result = NULL;} done: - if(result) reclaim_listobjectsv2(result); if(doc) ncxml_free(doc); + if(result) reclaim_listobjectsv2(result); return NCTHROW(stat); } @@ -1061,3 +1096,33 @@ queryinsert(NClist* list, char* ekey, char* evalue) return NCTHROW(stat); } +static int +httptonc(long httpcode) +{ + int stat = NC_NOERR; + if(httpcode == 0) + stat = NC_NOERR; + else if(httpcode <= 99) + stat = NC_EINTERNAL; /* should never happen */ + else if(httpcode <= 199) + stat = NC_NOERR; /* I guess */ + else if(httpcode <= 299) { + switch (httpcode) { + case 200: stat = NC_NOERR; break; + case 204: stat = NC_ENOOBJECT; break; + default: stat = NC_NOERR; break; + } + } else if(httpcode <= 399) + stat = NC_NOERR; /* ? */ + else if(httpcode <= 499) { + switch (httpcode) { + case 400: stat = NC_EINVAL; break; + case 401: case 402: case 403: + stat = NC_EAUTH; break; + case 404: stat = NC_ENOOBJECT; break; + default: stat = NC_EINVAL; break; + } + } else + stat = NC_ES3; + return stat; +} diff --git a/libncxml/ncxml_tinyxml2.cpp b/libncxml/ncxml_tinyxml2.cpp index 4307fa630a..e87e22a108 100644 --- a/libncxml/ncxml_tinyxml2.cpp +++ b/libncxml/ncxml_tinyxml2.cpp @@ -14,6 +14,10 @@ using namespace tinyxml2; +#ifdef _MSC_VER /*Do not use _WIN32*/ +#define XMLDocument tinyxml2::XMLDocument +#endif + static int ncxml_initialized = 0; void diff --git a/libnczarr/CMakeLists.txt b/libnczarr/CMakeLists.txt index c460e1d76e..01c1d4410c 100644 --- a/libnczarr/CMakeLists.txt +++ b/libnczarr/CMakeLists.txt @@ -5,10 +5,18 @@ # # Dennis Heimbigner -#add_compile_options("/showincludes") - # The source files for the HDF5 dispatch layer. -set(libnczarr_SOURCES +SET(libnczarr_SOURCES +zmetadata.c +zmetadata0.c +zmetadata2.c +zmetadata3.c +zformat.c +zformat3.c +zformat2.c +zsync.c +zinfer.c +zfill.c zarr.c zattr.c zxcache.c @@ -26,13 +34,18 @@ zmap_file.c zodom.c zopen.c zprov.c -zsync.c ztype.c zutil.c zvar.c zwalk.c +znc4.c +zfilter.c +zplugins.c zdebug.c +zmetadata.h +zformat.h zarr.h +zfill.h zcache.h zchunking.h zdispatch.h @@ -43,21 +56,26 @@ zodom.h zprovenance.h zplugins.h zfilter.h +znc4.h zdebug.h ) if(NETCDF_ENABLE_NCZARR_ZIP) - set(libnczarr_SOURCES ${libnczarr_SOURCES} zmap_zip.c) + SET(libnczarr_SOURCES ${libnczarr_SOURCES} zmap_zip.c) endif() -if(NETCDF_ENABLE_NCZARR_FILTERS) -set(libnczarr_SOURCES ${libnczarr_SOURCES} zfilter.c zplugins.c) +if(NETCDF_ENABLE_S3) + SET(libnczarr_SOURCES ${libnczarr_SOURCES} zmap_s3sdk.c) endif() -if(NETCDF_ENABLE_S3) - set(libnczarr_SOURCES ${libnczarr_SOURCES} zmap_s3sdk.c) +if(NETCDF_ENABLE_NCZARR_FILTERS) + SET(libnczarr_SOURCES ${libnczarr_SOURCES} zfilter.c) endif() +set_property(SOURCE zdispatch.c + PROPERTY + SKIP_UNITY_BUILD_INCLUSION ON) + # Build the Zarr dispatch layer as a library that will be included in # the netCDF library. @@ -81,5 +99,20 @@ if (NETCDF_ENABLE_DLL) target_compile_definitions(nczarr PRIVATE DLL_NETCDF DLL_EXPORT) endif() +target_link_libraries(nczarr PUBLIC ${CURL_LIBRARIES}) +target_include_directories(nczarr PUBLIC ${CURL_INCLUDE_DIRS}) +target_compile_options(nczarr + PRIVATE + -DCURL_STATICLIB=1 +) + +if (NETCDF_ENABLE_DLL) + target_compile_definitions(nczarr PRIVATE DLL_NETCDF DLL_EXPORT) +endif() + +if(STATUS_PARALLEL) + target_link_libraries(nczarr PUBLIC MPI::MPI_C) +endif(STATUS_PARALLEL) + # Remember to package this file for CMake builds. -ADD_EXTRA_DIST(${libnczarr_SOURCES} CMakeLists.txt) +add_extra_dist(${libnczarr_SOURCES} CMakeLists.txt) diff --git a/libnczarr/Makefile.am b/libnczarr/Makefile.am index 4227516267..a1ad84a74c 100644 --- a/libnczarr/Makefile.am +++ b/libnczarr/Makefile.am @@ -29,6 +29,13 @@ noinst_LTLIBRARIES = libnczarr.la # The source files. libnczarr_la_SOURCES = \ +zmetadata.c \ +zmetadata0.c \ +zmetadata2.c \ +zformat2.c \ +zsync.c \ +zinfer.c \ +zfill.c \ zarr.c \ zattr.c \ zxcache.c \ @@ -46,13 +53,19 @@ zmap_file.c \ zodom.c \ zopen.c \ zprov.c \ -zsync.c \ ztype.c \ +zfilter.c \ +zplugins.c \ zutil.c \ zvar.c \ +zformat.c \ zwalk.c \ +znc4.c \ zdebug.c \ +zmetadata.h \ +zformat.h \ zarr.h \ +zfill.h \ zcache.h \ zchunking.h \ zdispatch.h \ @@ -63,16 +76,17 @@ zodom.h \ zprovenance.h \ zplugins.h \ zfilter.h \ +znc4.h \ zdebug.h +#ifdef NETCDF_ENABLE_NCZARR_V3 +libnczarr_la_SOURCES += zmetadata3.c zformat3.c +#endif + if NETCDF_ENABLE_NCZARR_ZIP libnczarr_la_SOURCES += zmap_zip.c endif -if NETCDF_ENABLE_NCZARR_FILTERS -libnczarr_la_SOURCES += zfilter.c zplugins.c -endif - if NETCDF_ENABLE_S3 libnczarr_la_SOURCES += zmap_s3sdk.c if NETCDF_ENABLE_S3_AWS @@ -80,8 +94,5 @@ AM_CXXFLAGS += -std=c++11 endif endif -# For now, ignore these -IGNORED=ztype.c - # Package this for cmake build. EXTRA_DIST += CMakeLists.txt diff --git a/libnczarr/SimpleBinStream.h b/libnczarr/SimpleBinStream.h deleted file mode 100644 index c76d6a6e02..0000000000 --- a/libnczarr/SimpleBinStream.h +++ /dev/null @@ -1,1107 +0,0 @@ -// The MIT License (MIT) -// Simplistic Binary Streams 1.0.3 -// Copyright (C) 2014 - 2019, by Wong Shao Voon (shaovoon@yahoo.com) -// -// http://opensource.org/licenses/MIT -// -// version 0.9.2 : Optimize mem_istream constructor for const char* -// version 0.9.3 : Optimize mem_ostream vector insert -// version 0.9.4 : New ptr_istream class -// version 0.9.5 : Add Endianness Swap with compile time check -// version 0.9.6 : Using C File APIs, instead of STL file streams -// version 0.9.7 : Add memfile_istream -// version 0.9.8 : Fix GCC and Clang template errors -// version 0.9.9 : Fix bug of getting previous value when reading empty string -// version 1.0.0 : Fix buffer overrun bug when reading string (reported by imtrobin) -// version 1.0.1 : Fix memfile_istream tellg and seekg bug reported by macxfadz, -// use is_arithmetic instead of is_integral to determine swapping -// version 1.0.2 : Add overloaded open functions that take in file parameter in -// wide char type.(only available on win32) -// version 1.0.3 : Remove header -// version 1.0.4 : Fixed file_istream's seekg() and added writeat() to mem_ostream and memfile_ostream. Thanks Festering from CodeProject. - -#ifndef SimpleBinStream_H -#define SimpleBinStream_H - -#include -#include -#include -#include -#include -#include -#include - -namespace nczarr -{ - enum class Endian - { - Big, - Little - }; - using BigEndian = std::integral_constant; - using LittleEndian = std::integral_constant; - - struct SizeOf1 { }; - struct SizeOf2 { }; - struct SizeOf4 { }; - struct SizeOf8 { }; - struct UnknownSize { }; - - template - void swap_endian(T& ui, UnknownSize) - { - } - - template - void swap_endian(T& ui, SizeOf1) - { - } - - template - void swap_endian(T& ui, SizeOf8) - { - union EightBytes - { - T ui; - uint8_t arr[8]; - }; - - EightBytes fb; - fb.ui = ui; - // swap the endian - std::swap(fb.arr[0], fb.arr[7]); - std::swap(fb.arr[1], fb.arr[6]); - std::swap(fb.arr[2], fb.arr[5]); - std::swap(fb.arr[3], fb.arr[4]); - - ui = fb.ui; - } - - template - void swap_endian(T& ui, SizeOf4) - { - union FourBytes - { - T ui; - uint8_t arr[4]; - }; - - FourBytes fb; - fb.ui = ui; - // swap the endian - std::swap(fb.arr[0], fb.arr[3]); - std::swap(fb.arr[1], fb.arr[2]); - - ui = fb.ui; - } - - template - void swap_endian(T& ui, SizeOf2) - { - union TwoBytes - { - T ui; - uint8_t arr[2]; - }; - - TwoBytes fb; - fb.ui = ui; - // swap the endian - std::swap(fb.arr[0], fb.arr[1]); - - ui = fb.ui; - } - - template - using number_type = - typename std::conditional< - sizeof(T) == 1, - SizeOf1, - typename std::conditional< - sizeof(T) == 2, - SizeOf2, - typename std::conditional< - sizeof(T) == 4, - SizeOf4, - typename std::conditional< - sizeof(T) == 8, - SizeOf8, - UnknownSize - >::type - >::type - >::type - >::type; - - template - void swap_if_arithmetic(T& val, std::true_type) - { - swap_endian(val, number_type()); - } - - template - void swap_if_arithmetic(T& val, std::false_type) - { - // T is not arithmetic so do nothing - } - - template - void swap_endian_if_same_endian_is_false(T& val, std::false_type) - { - std::is_arithmetic is_integral_type; - - swap_if_arithmetic(val, is_integral_type); - } - - template - void swap_endian_if_same_endian_is_false(T& val, std::true_type) - { - // same endian so do nothing. - } - -template -class file_istream -{ -public: - file_istream() : input_file_ptr(nullptr), file_size(0L), read_length(0L) {} - file_istream(const char * file) : input_file_ptr(nullptr), file_size(0L), read_length(0L) - { - open(file); - } -#ifdef _MSC_VER - file_istream(const wchar_t * file) : input_file_ptr(nullptr), file_size(0L), read_length(0L) - { - open(file); - } -#endif - ~file_istream() - { - close(); - } - void open(const char * file) - { - close(); -#ifdef _MSC_VER - input_file_ptr = nullptr; - fopen_s(&input_file_ptr, file, "rb"); -#else - input_file_ptr = std::fopen(file, "rb"); -#endif - compute_length(); - } -#ifdef _MSC_VER - void open(const wchar_t * file) - { - close(); - input_file_ptr = nullptr; - _wfopen_s(&input_file_ptr, file, L"rb"); - compute_length(); - } -#endif - void close() - { - if (input_file_ptr) - { - fclose(input_file_ptr); - input_file_ptr = nullptr; - } - } - bool is_open() - { - return (input_file_ptr != nullptr); - } - long file_length() const - { - return file_size; - } - // http://www.cplusplus.com/reference/cstdio/feof/ - // stream's internal position indicator may point to the end-of-file for the - // next operation, but still, the end-of-file indicator may not be set until - // an operation attempts to read at that point. - bool eof() const // not using feof(), see above - { - return read_length >= file_size; - } - long tellg() const - { - return std::ftell(input_file_ptr); - } - void seekg (long pos) - { - std::fseek(input_file_ptr, pos, SEEK_SET); - read_length = pos; - } - void seekg (long offset, int way) - { - std::fseek(input_file_ptr, offset, way); - if (way == SEEK_END) - read_length = file_size - offset; - else if (way == SEEK_CUR) - read_length += offset; - else - read_length = offset; - } - - template - void read(T& t) - { - if(std::fread(reinterpret_cast(&t), sizeof(T), 1, input_file_ptr) != 1) - { - throw std::runtime_error("Read Error!"); - } - read_length += sizeof(T); - nczarr::swap_endian_if_same_endian_is_false(t, m_same_type); - } - void read(typename std::vector& vec) - { - if (std::fread(reinterpret_cast(&vec[0]), vec.size(), 1, input_file_ptr) != 1) - { - throw std::runtime_error("Read Error!"); - } - read_length += vec.size(); - } - void read(char* p, size_t size) - { - if (std::fread(reinterpret_cast(p), size, 1, input_file_ptr) != 1) - { - throw std::runtime_error("Read Error!"); - } - read_length += size; - } -private: - void compute_length() - { - seekg(0, SEEK_END); - file_size = tellg(); - seekg(0, SEEK_SET); - } - - std::FILE* input_file_ptr; - long file_size; - long read_length; - same_endian_type m_same_type; -}; - -template - file_istream& operator >> ( file_istream& istm, T& val) -{ - istm.read(val); - - return istm; -} - -template - file_istream& operator >> ( file_istream& istm, std::string& val) -{ - val.clear(); - - int size = 0; - istm.read(size); - - if(size<=0) - return istm; - - std::vector vec((size_t)size); - istm.read(vec); - val.assign(&vec[0], (size_t)size); - - return istm; -} - -template -class mem_istream -{ -public: - mem_istream() : m_index(0) {} - mem_istream(const char * mem, size_t size) - { - open(mem, size); - } - mem_istream(const std::vector& vec) - { - m_index = 0; - m_vec.reserve(vec.size()); - m_vec.assign(vec.begin(), vec.end()); - } - void open(const char * mem, size_t size) - { - m_index = 0; - m_vec.clear(); - m_vec.reserve(size); - m_vec.assign(mem, mem + size); - } - void close() - { - m_vec.clear(); - } - bool eof() const - { - return m_index >= m_vec.size(); - } - std::ifstream::pos_type tellg() const - { - return m_index; - } - bool seekg (size_t pos) - { - if(pos& get_internal_vec() - { - return m_vec; - } - - template - void read(T& t) - { - if(eof()) - throw std::runtime_error("Premature end of array!"); - - if((m_index + sizeof(T)) > m_vec.size()) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&t), &m_vec[m_index], sizeof(T)); - - nczarr::swap_endian_if_same_endian_is_false(t, m_same_type); - - m_index += sizeof(T); - } - - void read(typename std::vector& vec) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + vec.size()) > m_vec.size()) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&vec[0]), &m_vec[m_index], vec.size()); - - m_index += vec.size(); - } - - void read(char* p, size_t size) - { - if(eof()) - throw std::runtime_error("Premature end of array!"); - - if((m_index + size) > m_vec.size()) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(p), &m_vec[m_index], size); - - m_index += size; - } - - void read(std::string& str, const unsigned int size) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + size) > m_vec.size()) - throw std::runtime_error("Premature end of array!"); - - str.assign(&m_vec[m_index], size); - - m_index += str.size(); - } - -private: - std::vector m_vec; - size_t m_index; - same_endian_type m_same_type; -}; - -template - mem_istream& operator >> ( mem_istream& istm, T& val) -{ - istm.read(val); - - return istm; -} - -template -mem_istream& operator >> (mem_istream& istm, std::string& val) -{ - val.clear(); - - int size = 0; - istm.read(size); - - if(size<=0) - return istm; - - istm.read(val, size); - - return istm; -} - -template -class ptr_istream -{ -public: - ptr_istream() : m_arr(nullptr), m_size(0), m_index(0) {} - ptr_istream(const char * mem, size_t size) : m_arr(nullptr), m_size(0), m_index(0) - { - open(mem, size); - } - ptr_istream(const std::vector& vec) - { - m_index = 0; - m_arr = vec.data(); - m_size = vec.size(); - } - void open(const char * mem, size_t size) - { - m_index = 0; - m_arr = mem; - m_size = size; - } - void close() - { - m_arr = nullptr; m_size = 0; m_index = 0; - } - bool eof() const - { - return m_index >= m_size; - } - std::ifstream::pos_type tellg() const - { - return m_index; - } - bool seekg(size_t pos) - { - if (pos - void read(T& t) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + sizeof(T)) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&t), &m_arr[m_index], sizeof(T)); - - nczarr::swap_endian_if_same_endian_is_false(t, m_same_type); - - m_index += sizeof(T); - } - - void read(typename std::vector& vec) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + vec.size()) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&vec[0]), &m_arr[m_index], vec.size()); - - m_index += vec.size(); - } - - void read(char* p, size_t size) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + size) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(p), &m_arr[m_index], size); - - m_index += size; - } - - void read(std::string& str, const unsigned int size) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + size) > m_size) - throw std::runtime_error("Premature end of array!"); - - str.assign(&m_arr[m_index], size); - - m_index += str.size(); - } - -private: - const char* m_arr; - size_t m_size; - size_t m_index; - same_endian_type m_same_type; -}; - - -template - ptr_istream& operator >> ( ptr_istream& istm, T& val) -{ - istm.read(val); - - return istm; -} - -template - ptr_istream& operator >> ( ptr_istream& istm, std::string& val) -{ - val.clear(); - - int size = 0; - istm.read(size); - - if (size <= 0) - return istm; - - istm.read(val, size); - - return istm; -} - -template -class memfile_istream -{ -public: - memfile_istream() : m_arr(nullptr), m_size(0), m_index(0) {} - memfile_istream(const char * file) : m_arr(nullptr), m_size(0), m_index(0) - { - open(file); - } -#ifdef _MSC_VER - memfile_istream(const wchar_t * file) : m_arr(nullptr), m_size(0), m_index(0) - { - open(file); - } -#endif - ~memfile_istream() - { - close(); - } - void open(const char * file) - { - close(); -#ifdef _MSC_VER - std::FILE* input_file_ptr = nullptr; - fopen_s(&input_file_ptr, file, "rb"); -#else - std::FILE* input_file_ptr = std::fopen(file, "rb"); -#endif - compute_length(input_file_ptr); - m_arr = new char[m_size]; - std::fread(m_arr, m_size, 1, input_file_ptr); - fclose(input_file_ptr); - } -#ifdef _MSC_VER - void open(const wchar_t * file) - { - close(); - std::FILE* input_file_ptr = nullptr; - _wfopen_s(&input_file_ptr, file, L"rb"); - compute_length(input_file_ptr); - m_arr = new char[m_size]; - std::fread(m_arr, m_size, 1, input_file_ptr); - fclose(input_file_ptr); - } -#endif - void close() - { - if (m_arr) - { - delete[] m_arr; - m_arr = nullptr; m_size = 0; m_index = 0; - } - } - bool is_open() - { - return (m_arr != nullptr); - } - long file_length() const - { - return m_size; - } - bool eof() const - { - return m_index >= m_size; - } - std::ifstream::pos_type tellg() const - { - return m_index; - } - bool seekg(size_t pos) - { - if (pos < m_size) - m_index = pos; - else - return false; - - return true; - } - bool seekg(std::streamoff offset, std::ios_base::seekdir way) - { - if (way == std::ios_base::beg && offset < m_size) - m_index = offset; - else if (way == std::ios_base::cur && (m_index + offset) < m_size) - m_index += offset; - else if (way == std::ios_base::end && (m_size + offset) < m_size) - m_index = m_size + offset; - else - return false; - - return true; - } - - template - void read(T& t) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + sizeof(T)) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&t), &m_arr[m_index], sizeof(T)); - - nczarr::swap_endian_if_same_endian_is_false(t, m_same_type); - - m_index += sizeof(T); - } - - void read(typename std::vector& vec) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + vec.size()) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(&vec[0]), &m_arr[m_index], vec.size()); - - m_index += vec.size(); - } - - void read(char* p, size_t size) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + size) > m_size) - throw std::runtime_error("Premature end of array!"); - - std::memcpy(reinterpret_cast(p), &m_arr[m_index], size); - - m_index += size; - } - - void read(std::string& str, const unsigned int size) - { - if (eof()) - throw std::runtime_error("Premature end of array!"); - - if ((m_index + size) > m_size) - throw std::runtime_error("Premature end of array!"); - - str.assign(&m_arr[m_index], size); - - m_index += str.size(); - } - -private: - void compute_length(std::FILE* input_file_ptr) - { - std::fseek(input_file_ptr, 0, SEEK_END); - m_size = std::ftell(input_file_ptr); - std::fseek(input_file_ptr, 0, SEEK_SET); - } - - char* m_arr; - size_t m_size; - size_t m_index; - same_endian_type m_same_type; -}; - - -template - memfile_istream& operator >> ( memfile_istream& istm, T& val) -{ - istm.read(val); - - return istm; -} - -template - memfile_istream& operator >> ( memfile_istream& istm, std::string& val) -{ - val.clear(); - - int size = 0; - istm.read(size); - - if (size <= 0) - return istm; - - istm.read(val, size); - - return istm; -} - -template -class file_ostream -{ -public: - file_ostream() : output_file_ptr(nullptr) {} - file_ostream(const char * file) : output_file_ptr(nullptr) - { - open(file); - } -#ifdef _MSC_VER - file_ostream(const wchar_t * file) : output_file_ptr(nullptr) - { - open(file); - } -#endif - ~file_ostream() - { - close(); - } - void open(const char * file) - { - close(); -#ifdef _MSC_VER - output_file_ptr = nullptr; - fopen_s(&output_file_ptr, file, "wb"); -#else - output_file_ptr = std::fopen(file, "wb"); -#endif - } -#ifdef _MSC_VER - void open(const wchar_t * file) - { - close(); - output_file_ptr = nullptr; - _wfopen_s(&output_file_ptr, file, L"wb"); - } -#endif - void flush() - { - std::fflush(output_file_ptr); - } - void close() - { - if (output_file_ptr) - { - std::fclose(output_file_ptr); - output_file_ptr = nullptr; - } - } - bool is_open() - { - return output_file_ptr != nullptr; - } - template - void write(const T& t) - { - T t2 = t; - nczarr::swap_endian_if_same_endian_is_false(t2, m_same_type); - std::fwrite(reinterpret_cast(&t2), sizeof(T), 1, output_file_ptr); - } - void write(const std::vector& vec) - { - std::fwrite(reinterpret_cast(&vec[0]), vec.size(), 1, output_file_ptr); - } - void write(const char* p, size_t size) - { - std::fwrite(reinterpret_cast(p), size, 1, output_file_ptr); - } - -private: - std::FILE* output_file_ptr; - same_endian_type m_same_type; -}; - -template -file_ostream& operator << (file_ostream& ostm, const T& val) -{ - ostm.write(val); - - return ostm; -} - -template - file_ostream& operator << ( file_ostream& ostm, const std::string& val) -{ - int size = val.size(); - ostm.write(size); - - if(val.size()<=0) - return ostm; - - ostm.write(val.c_str(), val.size()); - - return ostm; -} - -template - file_ostream& operator << ( file_ostream& ostm, const char* val) -{ - int size = std::strlen(val); - ostm.write(size); - - if(size<=0) - return ostm; - - ostm.write(val, size); - - return ostm; -} - -template -class mem_ostream -{ -public: - mem_ostream() {} - void close() - { - m_vec.clear(); - } - const std::vector& get_internal_vec() - { - return m_vec; - } - template - void write(const T& t) - { - std::vector vec(sizeof(T)); - T t2 = t; - nczarr::swap_endian_if_same_endian_is_false(t2, m_same_type); - std::memcpy(reinterpret_cast(&vec[0]), reinterpret_cast(&t2), sizeof(T)); - write(vec); - } - void write(const std::vector& vec) - { - m_vec.insert(m_vec.end(), vec.begin(), vec.end()); - } - void write(const char* p, size_t size) - { - for(size_t i=0; i - void writeat(size_t pos, const T& t) - { - std::vector vec(sizeof(T)); - T t2 = t; - nczarr::swap_endian_if_same_endian_is_false(t2, m_same_type); - std::memcpy(reinterpret_cast(&vec[0]), reinterpret_cast(&t2), sizeof(T)); - writeat(pos, vec); - } - - void writeat(size_t pos, const std::vector& vec) - { - for (size_t n = 0, count = vec.size(); n < count; n++) - m_vec[pos++] = vec[n]; - } -private: - std::vector m_vec; - same_endian_type m_same_type; -}; - -template - mem_ostream& operator << ( mem_ostream& ostm, const T& val) -{ - ostm.write(val); - - return ostm; -} - -template - mem_ostream& operator << ( mem_ostream& ostm, const std::string& val) -{ - int size = val.size(); - ostm.write(size); - - if(val.size()<=0) - return ostm; - - ostm.write(val.c_str(), val.size()); - - return ostm; -} - -template - mem_ostream& operator << ( mem_ostream& ostm, const char* val) -{ - int size = std::strlen(val); - ostm.write(size); - - if(size<=0) - return ostm; - - ostm.write(val, size); - - return ostm; -} - -template -class memfile_ostream -{ -public: - memfile_ostream() {} - void close() - { - m_vec.clear(); - } - const std::vector& get_internal_vec() - { - return m_vec; - } - template - void write(const T& t) - { - std::vector vec(sizeof(T)); - T t2 = t; - nczarr::swap_endian_if_same_endian_is_false(t2, m_same_type); - std::memcpy(reinterpret_cast(&vec[0]), reinterpret_cast(&t2), sizeof(T)); - write(vec); - } - void write(const std::vector& vec) - { - m_vec.insert(m_vec.end(), vec.begin(), vec.end()); - } - void write(const char* p, size_t size) - { - for (size_t i = 0; i - void writeat(size_t pos, const T& t) - { - std::vector vec(sizeof(T)); - T t2 = t; - nczarr::swap_endian_if_same_endian_is_false(t2, m_same_type); - std::memcpy(reinterpret_cast(&vec[0]), reinterpret_cast(&t2), sizeof(T)); - writeat(pos, vec); - } - - void writeat(size_t pos, const std::vector& vec) - { - for (size_t n = 0, count = vec.size(); n < count; n++) - m_vec[pos++] = vec[n]; - } - bool write_to_file(const char* file) - { -#ifdef _MSC_VER - std::FILE* fp = nullptr; - fopen_s(&fp, file, "wb"); -#else - std::FILE* fp = std::fopen(file, "wb"); -#endif - if (fp) - { - size_t size = std::fwrite(m_vec.data(), m_vec.size(), 1, fp); - std::fflush(fp); - std::fclose(fp); - m_vec.clear(); - return size == 1u; - } - return false; - } -#ifdef _MSC_VER - bool write_to_file(const wchar_t* file) - { - std::FILE* fp = nullptr; - _wfopen_s(&fp, file, L"wb"); - if (fp) - { - size_t size = std::fwrite(m_vec.data(), m_vec.size(), 1, fp); - std::fflush(fp); - std::fclose(fp); - m_vec.clear(); - return size == 1u; - } - return false; - } -#endif - -private: - std::vector m_vec; - same_endian_type m_same_type; -}; - -template - memfile_ostream& operator << ( memfile_ostream& ostm, const T& val) -{ - ostm.write(val); - - return ostm; -} - -template - memfile_ostream& operator << ( memfile_ostream& ostm, const std::string& val) -{ - int size = val.size(); - ostm.write(size); - - if (val.size() <= 0) - return ostm; - - ostm.write(val.c_str(), val.size()); - - return ostm; -} - -template - memfile_ostream& operator << ( memfile_ostream& ostm, const char* val) -{ - int size = std::strlen(val); - ostm.write(size); - - if (size <= 0) - return ostm; - - ostm.write(val, size); - - return ostm; -} - -} // ns simple - -#endif // SimpleBinStream_H diff --git a/libnczarr/obsolete/zcache.c b/libnczarr/obsolete/zcache.c deleted file mode 100644 index 479af2be46..0000000000 --- a/libnczarr/obsolete/zcache.c +++ /dev/null @@ -1,544 +0,0 @@ -/* Copyright 2018, University Corporation for Atmospheric - * Research. See COPYRIGHT file for copying and redistribution - * conditions. */ - -/** - * @file @internal The functions which control NCZ - * caching. These caching controls allow the user to change the cache - * sizes of ZARR before opening files. - * - * @author Dennis Heimbigner, Ed Hartnett - */ - -#include "zincludes.h" -#include "zcache.h" - -#undef DEBUG - -#undef FILLONREAD - -#undef FLUSH - -/* Forward */ -static int get_chunk(NCZChunkCache* cache, const char* key, NCZCacheEntry* entry); -static int put_chunk(NCZChunkCache* cache, const char* key, const NCZCacheEntry*); -static int create_chunk(NCZChunkCache* cache, const char* key, NCZCacheEntry* entry); -static int buildchunkkey(size_t R, const size64_t* chunkindices, char** keyp); -static int makeroom(NCZChunkCache* cache); - -/**************************************************/ -/* Dispatch table per-var cache functions */ - -/** - * @internal Set chunk cache size for a variable. This is the internal - * function called by nc_set_var_chunk_cache(). - * - * @param ncid File ID. - * @param varid Variable ID. - * @param size Size in bytes to set cache. - * @param nelems # of entries in cache - * @param preemption Controls cache swapping. - * - * @returns ::NC_NOERR No error. - * @returns ::NC_EBADID Bad ncid. - * @returns ::NC_ENOTVAR Invalid variable ID. - * @returns ::NC_ESTRICTNC3 Attempting netcdf-4 operation on strict - * nc3 netcdf-4 file. - * @returns ::NC_EINVAL Invalid input. - * @returns ::NC_EHDFERR HDF5 error. - * @author Ed Hartnett - */ -int -NCZ_set_var_chunk_cache(int ncid, int varid, size_t cachesize, size_t nelems, float preemption) -{ - NC_GRP_INFO_T *grp; - NC_FILE_INFO_T *h5; - NC_VAR_INFO_T *var; - NCZ_VAR_INFO_T *zvar; - int retval; - - /* Check input for validity. */ - if (preemption < 0 || preemption > 1) - return NC_EINVAL; - - /* Find info for this file and group, and set pointer to each. */ - if ((retval = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5))) - return retval; - assert(grp && h5); - - /* Find the var. */ - if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid))) - return NC_ENOTVAR; - assert(var && var->hdr.id == varid); - - zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - assert(zvar != NULL && zvar->cache != NULL); - - /* Set the values. */ - var->chunk_cache_size = cachesize; - var->chunk_cache_nelems = nelems; - var->chunk_cache_preemption = preemption; - -#ifdef LOOK - /* Reopen the dataset to bring new settings into effect. */ - if ((retval = nc4_reopen_dataset(grp, var))) - return retval; -#endif - return NC_NOERR; -} - -/** - * @internal Adjust the chunk cache of a var for better - * performance. - * - * @note For contiguous and compact storage vars, or when parallel I/O - * is in use, this function will do nothing and return ::NC_NOERR; - * - * @param grp Pointer to group info struct. - * @param var Pointer to var info struct. - * - * @return ::NC_NOERR No error. - * @author Ed Hartnett - */ -int -NCZ_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) -{ - /* Reset the cache parameters since var chunking may have changed */ - - - return NC_NOERR; -} - -/**************************************************/ - -/** - * Create a chunk cache object - * - * @param var containing var - * @param entrysize Size in bytes of an entry - * @param cachep return cache pointer - * - * @return ::NC_NOERR No error. - * @return ::NC_EINVAL Bad preemption. - * @author Dennis Heimbigner, Ed Hartnett - */ -int -NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, NCZChunkCache** cachep) -{ - int stat = NC_NOERR; - NCZChunkCache* cache = NULL; - void* fill = NULL; - size_t nelems, cachesize; - NCZ_VAR_INFO_T* zvar = NULL; - - if(chunksize == 0) return NC_EINVAL; - - zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - - if((cache = calloc(1,sizeof(NCZChunkCache))) == NULL) - {stat = NC_ENOMEM; goto done;} - cache->var = var; - cache->ndims = var->ndims + zvar->scalar; - cache->chunksize = chunksize; - assert(cache->fillchunk == NULL); - cache->fillchunk = NULL; - - /* Figure out the actual cache size */ - cachesize = var->chunk_cache_size; - nelems = (cachesize / chunksize); - if(nelems == 0) nelems = 1; - /* Make consistent */ - cachesize = nelems * chunksize; - cache->maxentries = nelems; -#ifdef FLUSH - cache->maxentries = 1; -#endif - -#ifdef DEBUG - fprintf(stderr,"%s.cache: nelems=%ld size=%ld\n", - var->hdr.name,(unsigned long)cache->maxentries,(unsigned long)(cache->maxentries*cache->chunksize)); -#endif - if((cache->entries = nclistnew()) == NULL) - {stat = NC_ENOMEM; goto done;} - nclistsetalloc(cache->entries,cache->maxentries); - if(cachep) {*cachep = cache; cache = NULL;} -done: - nullfree(fill); - nullfree(cache); - return THROW(stat); -} - -void -NCZ_free_chunk_cache(NCZChunkCache* cache) -{ - if(cache == NULL) return; - /* Iterate over the entries */ - while(nclistlength(cache->entries) > 0) { - NCZCacheEntry* entry = nclistremove(cache->entries,0); - nullfree(entry->data); nullfree(entry->key); nullfree(entry); - } -#ifdef DEBUG -fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->entries)); -#endif - nclistfree(cache->entries); - cache->entries = NULL; - nullfree(cache->fillchunk); - nullfree(cache); -} - -size64_t -NCZ_cache_entrysize(NCZChunkCache* cache) -{ - assert(cache); - return cache->chunksize; -} - -/* Return number of active entries in cache */ -size64_t -NCZ_cache_size(NCZChunkCache* cache) -{ - assert(cache); - return nclistlength(cache->entries); -} - -int -NCZ_read_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap) -{ - int stat = NC_NOERR; - char* key = NULL; - int rank = cache->ndims; - NC_FILE_INFO_T* file = cache->var->container->nc4_info; - NCZCacheEntry* entry = NULL; - int i; - - /* Create the key for this cache */ - if((stat = NCZ_buildchunkpath(cache,indices,&key))) goto done; - - /* See if already in cache try MRU */ - for(i=nclistlength(cache->entries)-1;i>=0;i--) { - entry = (NCZCacheEntry*)nclistget(cache->entries,i); - if(strcmp(key,entry->key)==0) { - if(datap) *datap = entry->data; - /* Move to keep MRU at end */ - nclistremove(cache->entries,i); - break; - } else entry = NULL; - } - if(entry == NULL) { /*!found*/ - /* Make room in the cache */ - if((stat=makeroom(cache))) goto done; - /* Create a new entry */ - if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL) - {stat = NC_ENOMEM; goto done;} - memcpy(entry->indices,indices,rank*sizeof(size64_t)); - /* Create the local copy space */ - if((entry->data = calloc(1,cache->chunksize)) == NULL) - {stat = NC_ENOMEM; goto done;} - entry->key= key; key = NULL; - /* Try to read the object in toto */ - stat=get_chunk(cache,entry->key,entry); - switch (stat) { - case NC_NOERR: break; - case NC_EEMPTY: - case NC_ENOTFOUND: /*signals the chunk needs to be created */ - /* If the file is read-only, then fake the chunk */ - entry->modified = (!file->no_write); - if(!file->no_write) { - if((stat = create_chunk(cache,entry->key,entry))) goto done; - } -#ifdef FILLONREAD - /* apply fill value */ - memcpy(entry->data,cache->fillchunk,cache->chunksize); -#else - memset(entry->data,0,cache->chunksize); -#endif - break; - default: goto done; - } - } - nclistpush(cache->entries,entry); -#ifdef DEBUG -fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->entries)); -#endif - if(datap) *datap = entry->data; - entry = NULL; - -done: - if(entry) {nullfree(entry->data); nullfree(entry->key);} - nullfree(entry); - nullfree(key); - return THROW(stat); -} - -int -NCZ_write_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap) -{ - int stat = NC_NOERR; - char* key = NULL; - int i,rank = cache->ndims; - NCZCacheEntry* entry = NULL; - - /* Create the key for this cache */ - if((stat = NCZ_buildchunkpath(cache,indices,&key))) goto done; - - /* See if already in cache try MRU */ - for(i=nclistlength(cache->entries)-1;i>=0;i--) { - entry = (NCZCacheEntry*)nclistget(cache->entries,i); - if(strcmp(key,entry->key)==0) { - if(datap) *datap = entry->data; - /* Move to keep MRU at end */ - nclistremove(cache->entries,i); - break; - } else entry = NULL; - } - if(entry == NULL) { /*!found*/ - if((stat=makeroom(cache))) goto done; - /* Create a new entry */ - if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL) - {stat = NC_ENOMEM; goto done;} - memcpy(entry->indices,indices,rank*sizeof(size64_t)); - /* Create the local copy space */ - if((entry->data = calloc(1,cache->chunksize)) == NULL) - {stat = NC_ENOMEM; goto done;} - entry->key= key; key = NULL; - } - entry->modified = 1; - nclistpush(cache->entries,entry); /* MRU order */ -#ifdef DEBUG -fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->entries)); -#endif - entry = NULL; - -done: - if(entry) {nullfree(entry->data); nullfree(entry->key);} - nullfree(entry); - nullfree(key); - return THROW(stat); -} - -static int -makeroom(NCZChunkCache* cache) -{ - int stat = NC_NOERR; - /* Flush from LRU end if we are at capacity */ - while(nclistlength(cache->entries) >= cache->maxentries) { - NCZCacheEntry* e = nclistremove(cache->entries,0); - assert(e != NULL); - if(e->modified) /* flush to file */ - stat=put_chunk(cache,e->key,e); - /* reclaim */ - nullfree(e->data); nullfree(e->key); nullfree(e); - } -#ifdef DEBUG -fprintf(stderr,"|cache.makeroom|=%ld\n",nclistlength(cache->entries)); -#endif - return stat; -} - -int -NCZ_flush_chunk_cache(NCZChunkCache* cache) -{ - int stat = NC_NOERR; - size_t i; - - if(NCZ_cache_size(cache) == 0) goto done; - - /* Iterate over the entries in hashmap */ - for(i=0;ientries);i++) { - NCZCacheEntry* entry = nclistget(cache->entries,i); - if(entry->modified) { - /* Write out this chunk in toto*/ - if((stat=put_chunk(cache,entry->key,entry))) - goto done; - } - entry->modified = 0; - } - -done: - return THROW(stat); -} - -#if 0 -int -NCZ_chunk_cache_modified(NCZChunkCache* cache, const size64_t* indices) -{ - int stat = NC_NOERR; - char* key = NULL; - NCZCacheEntry* entry = NULL; - int rank = cache->ndims; - - /* Create the key for this cache */ - if((stat=buildchunkkey(rank, indices, &key))) goto done; - - /* See if already in cache */ - if(NC_hashmapget(cache->entries, key, strlen(key), (uintptr_t*)entry)) { /* found */ - entry->modified = 1; - } - -done: - nullfree(key); - return THROW(stat); -} -#endif - -/**************************************************/ -/* -From Zarr V2 Specification: -"The compressed sequence of bytes for each chunk is stored under -a key formed from the index of the chunk within the grid of -chunks representing the array. To form a string key for a -chunk, the indices are converted to strings and concatenated -with the dimension_separator character ('/' or '.') separating each index. For -example, given an array with shape (10000, 10000) and chunk -shape (1000, 1000) there will be 100 chunks laid out in a 10 by -10 grid. The chunk with indices (0, 0) provides data for rows -0-1000 and columns 0-1000 and is stored under the key "0.0"; the -chunk with indices (2, 4) provides data for rows 2000-3000 and -columns 4000-5000 and is stored under the key "2.4"; etc." -*/ - -/** - * @param R Rank - * @param chunkindices The chunk indices - * @param keyp Return the chunk key string - */ -static int -buildchunkkey(size_t R, const size64_t* chunkindices, char** keyp) -{ - int stat = NC_NOERR; - int r; - NCbytes* key = ncbytesnew(); - - if(keyp) *keyp = NULL; - - for(r=0;r 0) ncbytescat(key,"."); - /* Print as decimal with no leading zeros */ - snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]); - ncbytescat(key,sindex); - } - ncbytesnull(key); - if(keyp) *keyp = ncbytesextract(key); - - ncbytesfree(key); - return THROW(stat); -} - -/** - * @internal Push data to chunk of a file. - * If chunk does not exist, create it - * - * @param file Pointer to file info struct. - * @param proj Chunk projection - * @param datalen size of data - * @param data Buffer containing the chunk data to write - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -put_chunk(NCZChunkCache* cache, const char* key, const NCZCacheEntry* entry) -{ - int stat = NC_NOERR; - NCZ_FILE_INFO_T* zfile = NULL; - NCZMAP* map = NULL; - - LOG((3, "%s: var: %p", __func__, cache->var)); - - zfile = ((cache->var->container)->nc4_info)->format_file_info; - map = zfile->map; - - stat = nczmap_write(map,key,0,cache->chunksize,entry->data); - switch(stat) { - case NC_NOERR: break; - case NC_EEMPTY: - /* Create the chunk */ - switch (stat = nczmap_defineobj(map,key)) { - case NC_NOERR: case NC_EFOUND: break; - default: goto done; - } - /* write again */ - if((stat = nczmap_write(map,key,0,cache->chunksize,entry->data))) - goto done; - break; - default: goto done; - } -done: - return THROW(stat); -} - -/** - * @internal Push data from memory to file. - * - * @param cache Pointer to parent cache - * @param key chunk key - * @param entry cache entry to read into - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -get_chunk(NCZChunkCache* cache, const char* key, NCZCacheEntry* entry) -{ - int stat = NC_NOERR; - NCZMAP* map = NULL; - NC_FILE_INFO_T* file = NULL; - NCZ_FILE_INFO_T* zfile = NULL; - - LOG((3, "%s: file: %p", __func__, file)); - - file = (cache->var->container)->nc4_info; - zfile = file->format_file_info; - map = zfile->map; - assert(map && entry->data); - - stat = nczmap_read(map,key,0,cache->chunksize,(char*)entry->data); - - return THROW(stat); -} - -static int -create_chunk(NCZChunkCache* cache, const char* key, NCZCacheEntry* entry) -{ - int stat = NC_NOERR; - NC_FILE_INFO_T* file = NULL; - NCZ_FILE_INFO_T* zfile = NULL; - NCZMAP* map = NULL; - - file = (cache->var->container)->nc4_info; - zfile = file->format_file_info; - map = zfile->map; - - /* Create the chunk */ - if((stat = nczmap_defineobj(map,key))) goto done; - entry->modified = 1; /* mark as modified */ - /* let higher function decide on fill */ - -done: - return THROW(stat); -} - -int -NCZ_buildchunkpath(NCZChunkCache* cache, const size64_t* chunkindices, char** keyp) -{ - int stat = NC_NOERR; - char* chunkname = NULL; - char* varkey = NULL; - char* key = NULL; - - /* Get the chunk object name */ - if((stat = buildchunkkey(cache->ndims, chunkindices, &chunkname))) goto done; - /* Get the var object key */ - if((stat = NCZ_varkey(cache->var,&varkey))) goto done; - /* Prefix the path to the containing variable object */ - if((stat=nczm_concat(varkey,chunkname,&key))) goto done; - if(keyp) {*keyp = key; key = NULL;} - -done: - nullfree(chunkname); - nullfree(varkey); - nullfree(key); - return THROW(stat); -} - diff --git a/libnczarr/obsolete/zmap_nz4.c b/libnczarr/obsolete/zmap_nz4.c deleted file mode 100644 index 34927c818f..0000000000 --- a/libnczarr/obsolete/zmap_nz4.c +++ /dev/null @@ -1,623 +0,0 @@ -/* - * Copyright 2018, University Corporation for Atmospheric Research - * See netcdf/COPYRIGHT file for copying and redistribution conditions. - */ - -#include "zincludes.h" - -#include "fbits.h" -#include "ncpathmgr.h" - -/* -Do a simple mapping of our simplified map model -to a netcdf-4 file. - -For the object API, the mapping is as follows: -1. Every object (e.g. group or array) is mapped to a netcdf-4 group. -2. Object content is kept as a ubyte typed variable with one - unlimited dimension -*/ - -#undef DEBUG - -#define NCZM_NC4_V1 1 - -/* What to replace ZDOT with */ -#define ZDOTNC4 '_' - -/* define the attr/var name containing an objects content */ -#define ZCONTENT "data" - -/* Define the dimension for the ZCONTENT variable */ -/* Avoid creating a coordinate variable */ -#define ZCONTENTDIM "data_dim" - -/* Define the "subclass" of NCZMAP */ -typedef struct Z4MAP { - NCZMAP map; - char* root; - int ncid; -} Z4MAP; - - -/* Forward */ -static NCZMAP_API zapi; -static int znc4close(NCZMAP* map, int delete); -static int zlookupgroup(Z4MAP*, NClist* segments, int nskip, int* grpidp); -static int zlookupobj(Z4MAP*, NClist* segments, int* objidp); -static int zcreategroup(Z4MAP* z4map, NClist* segments, int nskip, int* grpidp); -static int zcreateobj(Z4MAP*, NClist* segments, int* objidp); -static int zcreatedim(Z4MAP*, int, int* dimidp); -static int parseurl(const char* path0, NCURI** urip); -static void nc4ify(const char* zname, char* nc4name); -static void zify(const char* nc4name, char* zname); -static int testcontentbearing(int grpid); -static int errno2ncerr(int err); - -/* Define the Dataset level API */ - -static int -znc4create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) -{ - int stat = NC_NOERR; - char* truepath = NULL; - char* local = NULL; /* localized truepath */ - Z4MAP* z4map = NULL; - int ncid; - NCURI* url = NULL; - - /* Fix up mode */ - mode = (NC_NETCDF4 | NC_WRITE | mode); - if(flags & FLAG_BYTERANGE) - mode &= ~(NC_CLOBBER | NC_WRITE); - - if(!(mode & NC_WRITE)) - {stat = NC_EPERM; goto done;} - - if((stat=parseurl(path,&url))) - goto done; - - /* Canonicalize the root path */ - if((stat = nczm_canonicalpath(url->path,&truepath))) goto done; - /* Also get local path */ - if((stat = nczm_localize(truepath,&local,LOCALIZE))) goto done; - - /* Build the z4 state */ - if((z4map = calloc(1,sizeof(Z4MAP))) == NULL) - {stat = NC_ENOMEM; goto done;} - - z4map->map.format = NCZM_NC4; - z4map->map.url = ncuribuild(url,NULL,NULL,NCURIALL); - z4map->map.mode = mode; - z4map->map.flags = flags; - z4map->map.api = &zapi; - z4map->root= truepath; - truepath = NULL; - - if((stat=nc_create(local,mode,&ncid))) - {stat = NC_EEMPTY; goto done;} /* could not open */ - z4map->ncid = ncid; - - if(mapp) *mapp = (NCZMAP*)z4map; - -done: - ncurifree(url); - nullfree(truepath); - nullfree(local); - if(stat) znc4close((NCZMAP*)z4map,1); - return (stat); -} - -static int -znc4open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) -{ - int stat = NC_NOERR; - char* truepath = NULL; - char* local = NULL; - Z4MAP* z4map = NULL; - int ncid; - NCURI* url = NULL; - - /* Fixup mode */ - mode = NC_NETCDF4 | mode; - if(flags & FLAG_BYTERANGE) - mode &= ~(NC_CLOBBER | NC_WRITE); - - if((stat=parseurl(path,&url))) - goto done; - - /* Canonicalize the root path */ - if((stat = nczm_canonicalpath(url->path,&truepath))) goto done; - /* Also get local path */ - if((stat = nczm_localize(truepath,&local,LOCALIZE))) goto done; - - /* Build the z4 state */ - if((z4map = calloc(1,sizeof(Z4MAP))) == NULL) - {stat = NC_ENOMEM; goto done;} - - z4map->map.format = NCZM_NC4; - z4map->map.url = ncuribuild(url,NULL,NULL,NCURIALL); - z4map->map.mode = mode; - z4map->map.flags = flags; - z4map->map.api = (NCZMAP_API*)&zapi; - z4map->root = truepath; - truepath = NULL; - - if((stat=nc_open(local,mode,&ncid))) - goto done; /* could not open */ - z4map->ncid = ncid; - - if(mapp) *mapp = (NCZMAP*)z4map; - -done: - nullfree(truepath); - nullfree(local); - ncurifree(url); - if(stat) znc4close((NCZMAP*)z4map,0); - return errno2ncerr(stat); -} - -/**************************************************/ -/* Object API */ - -static int -znc4close(NCZMAP* map, int delete) -{ - int stat = NC_NOERR; - Z4MAP* z4map = (Z4MAP*)map; - char* path = NULL; - - if(map == NULL) return NC_NOERR; - - path = z4map->root; - - if((stat = nc_close(z4map->ncid))) - goto done; - if(delete) { - if((stat = nc_delete(path))) - goto done; - } - -done: - nullfree(z4map->root); - nczm_clear(map); - free(z4map); - return (stat); -} - -static int -znc4exists(NCZMAP* map, const char* key) -{ - int stat = NC_NOERR; - Z4MAP* z4map = (Z4MAP*)map; - NClist* segments = nclistnew(); - int grpid; - - if((stat=nczm_split(key,segments))) - goto done; - switch(stat=zlookupobj(z4map,segments,&grpid)) { - case NC_NOERR: break; - case NC_ENOTFOUND: stat = NC_EEMPTY; /* Does not exist */ - case NC_EEMPTY: /* Not an object */ - default: break; /* other error */ - } - -done: - nclistfreeall(segments); - return (stat); -} - -static int -znc4len(NCZMAP* map, const char* key, size64_t* lenp) -{ - int stat = NC_NOERR; - Z4MAP* z4map = (Z4MAP*)map; - NClist* segments = nclistnew(); - int grpid, vid; - size_t dimlen; - int dimids[1]; - - if((stat=nczm_split(key,segments))) - goto done; - - switch(stat=zlookupobj(z4map,segments,&grpid)) { - case NC_NOERR: - /* Look for the data variable */ - if((stat = nc_inq_varid(grpid,ZCONTENT,&vid))) goto done; - /* Get size for this variable */ - if((stat = nc_inq_vardimid(grpid,vid,dimids))) goto done; - /* Get size of the one and only dim */ - if((stat = nc_inq_dimlen(z4map->ncid,dimids[0],&dimlen))) goto done; - if(lenp) *lenp = (size64_t)dimlen; - break; - case NC_ENOTFOUND: stat = NC_EEMPTY; /* does not exist */ - case NC_EEMPTY: /* Not an object */ - if(lenp) *lenp = 0; - break; - default: break; - } - -done: - nclistfreeall(segments); - return (stat); -} - -static int -znc4defineobj(NCZMAP* map, const char* key) -{ - int stat = NC_NOERR; - int grpid; - Z4MAP* z4map = (Z4MAP*)map; /* cast to true type */ - NClist* segments = nclistnew(); - - if((stat=nczm_split(key,segments))) - goto done; - switch (stat = zlookupobj(z4map,segments,&grpid)) { - case NC_NOERR: break; /* already exists */ - case NC_ENOTFOUND: stat = NC_EEMPTY; - case NC_EEMPTY: - if((stat = zcreateobj(z4map,segments,&grpid))) goto done; - break; - default: break; /* other error */ - } - -done: - nclistfreeall(segments); - return (stat); -} - -static int -znc4read(NCZMAP* map, const char* key, size64_t start, size64_t count, void* content) -{ - int stat = NC_NOERR; - int grpid,vid; - Z4MAP* z4map = (Z4MAP*)map; /* cast to true type */ - size_t vstart[1]; - size_t vcount[1]; - NClist* segments = nclistnew(); - - if((stat=nczm_split(key,segments))) - goto done; - switch (stat = zlookupobj(z4map,segments,&grpid)) { - case NC_NOERR: /* exists */ - /* Look for the data variable */ - if((stat = nc_inq_varid(grpid,ZCONTENT,&vid))) goto done; - vstart[0] = (size_t)start; - vcount[0] = (size_t)count; - if((stat = nc_get_vara(grpid,vid,vstart,vcount,content))) goto done; - break; - case NC_ENOTFOUND: stat = NC_EEMPTY; - case NC_EEMPTY: break; /* no data */ - default: break; /* other error */ - } - -done: - nclistfreeall(segments); - return (stat); -} - -static int -znc4write(NCZMAP* map, const char* key, size64_t start, size64_t count, const void* content) -{ - int stat = NC_NOERR; - int grpid,vid; - Z4MAP* z4map = (Z4MAP*)map; /* cast to true type */ - size_t vstart[1]; - size_t vcount[1]; - NClist* segments = nclistnew(); - - if((stat=nczm_split(key,segments))) - goto done; - switch (stat = zlookupobj(z4map,segments,&grpid)) { - case NC_NOERR: /* exists */ - /* Look for the data variable */ - if((stat = nc_inq_varid(grpid,ZCONTENT,&vid))) goto done; - vstart[0] = (size_t)start; - vcount[0] = (size_t)count; - if((stat = nc_put_vara(grpid,vid,vstart,vcount,content))) goto done; - break; - case NC_ENOTFOUND: stat = NC_EEMPTY; - case NC_EEMPTY: break; /* no data */ - default: break; /* other error */ - } -done: - nclistfreeall(segments); - return (stat); -} - -/* -Return a list of keys immediately "below" a specified prefix. -In theory, the returned list should be sorted in lexical order, -but it is not. -*/ -int -znc4search(NCZMAP* map, const char* prefix, NClist* matches) -{ - int stat = NC_NOERR; - Z4MAP* z4map = (Z4MAP*)map; - NClist* segments = nclistnew(); - int grpid, ngrps; - int* subgrps = NULL; - int* vars = NULL; - int i; - NCbytes* key = ncbytesnew(); - - if((stat=nczm_split(prefix,segments))) - goto done; - if(nclistlength(segments) > 0) { - /* Fix the last name */ - size_t pos = nclistlength(segments)-1; - char* name = nclistget(segments,pos); - char zname[NC_MAX_NAME]; - zify(name,zname); - nclistset(segments,pos,strdup(zname)); - nullfree(name); - } -#ifdef DEBUG - { - int i; - fprintf(stderr,"segments: %d: ",nclistlength(segments)); - for(i=0;incid; - for(i=0;i 0) */ - gname = nclistget(segments,len-1); - grpid = z4map->ncid; - /* Do all but last group */ - for(i=0;i<(len-1);i++) { - const char* seg = nclistget(segments,i); - nc4ify(seg,nc4name); - /* Does this group exist? */ - if((stat=nc_inq_grp_ncid(grpid,nc4name,&grpid2)) == NC_ENOGRP) { - {stat = NC_ENOTFOUND; goto done;} /* missing intermediate */ - } - grpid = grpid2; - } - /* Check status of last group */ - nc4ify(gname,nc4name); - if((stat = nc_inq_grp_ncid(grpid,nc4name,&grpid2))) { - if(stat != NC_ENOGRP) goto done; - if((stat = nc_def_grp(grpid,nc4name,&grpid2))) - goto done; - grpid = grpid2; - } - - if(grpidp) *grpidp = grpid; - -done: - return (stat); -} - -static int -zcreatedim(Z4MAP* z4map, int grpid, int* dimidp) -{ - int stat = NC_NOERR; - int dimid; - - NC_UNUSED(z4map); - - if((stat=nc_inq_dimid(grpid,ZCONTENTDIM,&dimid))) { - /* create it */ - if((stat=nc_def_dim(grpid,ZCONTENTDIM,NC_UNLIMITED,&dimid))) - goto done; - } - if(dimidp) *dimidp = dimid; - -done: - return (stat); -} - -/* Create an object group corresponding to a key; create any - necessary intermediates. - */ -static int -zcreateobj(Z4MAP* z4map, NClist* segments, int* grpidp) -{ - int skip,stat = NC_NOERR; - int grpid, varid; - int dimid[1]; - - /* Create the whole path */ - skip = nclistlength(segments); - for(skip--;skip >= 0; skip--) { - if((stat = zcreategroup(z4map,segments,skip,&grpid))) - goto done; - } - /* Last grpid should be one we want */ - if(grpidp) *grpidp = grpid; - /* Create the content-bearer */ - /* Create the corresponding dimension */ - if((stat = zcreatedim(z4map,grpid,&dimid[0]))) - goto done; - /* Create the variable */ - if((stat=nc_def_var(grpid, ZCONTENT, NC_UBYTE, 1, dimid, &varid))) - goto done; -done: - return (stat); -} - -static int -parseurl(const char* path0, NCURI** urip) -{ - int stat = NC_NOERR; - NCURI* uri = NULL; - ncuriparse(path0,&uri); - if(uri == NULL) - {stat = NC_EURL; goto done;} - if(urip) {*urip = uri; uri = NULL;} - -done: - ncurifree(uri); - return stat; -} - -/* Convert _z... name to .z... name */ -static void -zify(const char* nc4name, char* zname) -{ - zname[0] = '\0'; - strlcat(zname,nc4name,NC_MAX_NAME); - if(zname[0] == ZDOTNC4) zname[0] = NCZM_DOT; -} - -/* Convert .z... name to _z... name */ -static void -nc4ify(const char* zname, char* nc4name) -{ - nc4name[0] = '\0'; - strlcat(nc4name,zname,NC_MAX_NAME); - if(nc4name[0] == NCZM_DOT) nc4name[0] = ZDOTNC4; -} - -/* Convert errno to closest NC_EXXX error */ -static int -errno2ncerr(int err) -{ - switch (err) { - case ENOENT: err = NC_ENOTFOUND; break; /* File does not exist */ - case ENOTDIR: err = NC_EEMPTY; break; /* no content */ - case EACCES: err = NC_EAUTH; break; /* file permissions */ - case EPERM: err = NC_EAUTH; break; /* ditto */ - default: break; - } - return err; -} - -/**************************************************/ -/* External API objects */ - -NCZMAP_DS_API zmap_nz4 = { - NCZM_NC4_V1, - 0, - znc4create, - znc4open, -}; - -static NCZMAP_API zapi = { - NCZM_NC4_V1, - znc4close, - znc4exists, - znc4len, - znc4defineobj, - znc4read, - znc4write, - znc4search, -}; diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index 9ff7893a7f..75b1c72580 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -23,25 +23,25 @@ static int applycontrols(NCZ_FILE_INFO_T* zinfo); */ int -ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* controls) +ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* urlcontrols) { int stat = NC_NOERR; - NCZ_FILE_INFO_T* zinfo = NULL; + NCZ_FILE_INFO_T* zfile = NULL; NCZ_GRP_INFO_T* zgrp = NULL; NCURI* uri = NULL; NC* nc = NULL; NCjson* json = NULL; char* key = NULL; - ZTRACE(3,"file=%s root=%s controls=%s",file->hdr.name,root->hdr.name,(controls?nczprint_env(controls):"null")); + ZTRACE(3,"root=%s urlcontrols=%s",root->hdr.name,(urlcontrols?nczprint_envlist(urlcontrols):"null")); nc = (NC*)file->controller; /* Add struct to hold NCZ-specific file metadata. */ - if (!(zinfo = calloc(1, sizeof(NCZ_FILE_INFO_T)))) + if (!(zfile = calloc(1, sizeof(NCZ_FILE_INFO_T)))) {stat = NC_ENOMEM; goto done;} - file->format_file_info = zinfo; - zinfo->common.file = file; + file->format_file_info = zfile; + zfile->common.file = file; /* Add struct to hold NCZ-specific group info. */ if (!(zgrp = calloc(1, sizeof(NCZ_GRP_INFO_T)))) @@ -50,34 +50,34 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* controls) zgrp->common.file = file; /* Fill in NCZ_FILE_INFO_T */ - zinfo->creating = 1; - zinfo->common.file = file; - zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); - if((zinfo->controllist=nclistclone(controls,1)) == NULL) + zfile->creating = 1; + zfile->common.file = file; + zfile->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); + if((zfile->urlcontrols=nclistclone(urlcontrols,1)) == NULL) {stat = NC_ENOMEM; goto done;} - /* fill in some of the zinfo and zroot fields */ - zinfo->zarr.zarr_version = atoi(ZARRVERSION); - sscanf(NCZARRVERSION,"%lu.%lu.%lu", - &zinfo->zarr.nczarr_version.major, - &zinfo->zarr.nczarr_version.minor, - &zinfo->zarr.nczarr_version.release); - - zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; - /* Apply client controls */ - if((stat = applycontrols(zinfo))) goto done; + if((stat = applycontrols(zfile))) goto done; /* Load auth info from rc file */ if((stat = ncuriparse(nc->path,&uri))) goto done; if(uri) { - if((stat = NC_authsetup(&zinfo->auth, uri))) + if((stat = NC_authsetup(&zfile->auth, uri))) goto done; } + /* default the zarr format */ + if(zfile->zarr.zarr_format == 0) + zfile->zarr.zarr_format = NC_getglobalstate()->zarr.default_zarrformat; + /* initialize map handle*/ - if((stat = nczmap_create(zinfo->controls.mapimpl,nc->path,nc->mode,zinfo->controls.flags,NULL,&zinfo->map))) - goto done; + if((stat = NCZ_get_map(file,uri,(mode_t)nc->mode,zfile->flags,NULL,&zfile->map))) goto done; + + /* And get the format dispatcher */ + if((stat = NCZ_get_create_formatter(file, (const NCZ_Formatter**)&zfile->dispatcher))) goto done; + + /* And get the consolidated metadata handler */ + if((stat = NCZMD_set_metadata_handler(file))) goto done; done: ncurifree(uri); @@ -95,25 +95,19 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, NClist* controls) */ int -ncz_open_dataset(NC_FILE_INFO_T* file, NClist* controls) +ncz_open_dataset(NC_FILE_INFO_T* file, NClist* urlcontrols) { int stat = NC_NOERR; NC* nc = NULL; NC_GRP_INFO_T* root = NULL; NCURI* uri = NULL; - void* content = NULL; - NCjson* json = NULL; - NCZ_FILE_INFO_T* zinfo = NULL; - int mode; + NCZ_FILE_INFO_T* zfile = NULL; NClist* modeargs = NULL; - char* nczarr_version = NULL; - char* zarr_format = NULL; - ZTRACE(3,"file=%s controls=%s",file->hdr.name,(controls?nczprint_envv(controls):"null")); + ZTRACE(3,"file=%s urlcontrols=%s",file->hdr.name,(urlcontrols?nczprint_envlist(urlcontrols):"null")); /* Extract info reachable via file */ nc = (NC*)file->controller; - mode = nc->mode; root = file->root_grp; assert(root != NULL && root->hdr.sort == NCGRP); @@ -121,58 +115,52 @@ ncz_open_dataset(NC_FILE_INFO_T* file, NClist* controls) /* Add struct to hold NCZ-specific file metadata. */ if (!(file->format_file_info = calloc(1, sizeof(NCZ_FILE_INFO_T)))) {stat = NC_ENOMEM; goto done;} - zinfo = file->format_file_info; + zfile = file->format_file_info; /* Fill in NCZ_FILE_INFO_T */ - zinfo->creating = 0; - zinfo->common.file = file; - zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); - if((zinfo->controllist=nclistclone(controls,1)) == NULL) + zfile->creating = 0; + zfile->common.file = file; + zfile->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); + if((zfile->urlcontrols = nclistclone(urlcontrols,1))==NULL) /*0=>envv style*/ {stat = NC_ENOMEM; goto done;} - zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; - + /* Add struct to hold NCZ-specific group info. */ if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T)))) {stat = NC_ENOMEM; goto done;} ((NCZ_GRP_INFO_T*)root->format_grp_info)->common.file = file; /* Apply client controls */ - if((stat = applycontrols(zinfo))) goto done; - - /* initialize map handle*/ - if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map))) - goto done; - - /* Ok, try to read superblock */ - if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done; - - if(nczarr_version == NULL) /* default */ - nczarr_version = strdup(NCZARRVERSION); - if(zarr_format == NULL) /* default */ - zarr_format = strdup(ZARRVERSION); - /* Extract the information from it */ - if(sscanf(zarr_format,"%d",&zinfo->zarr.zarr_version)!=1) - {stat = NC_ENCZARR; goto done;} - if(sscanf(nczarr_version,"%lu.%lu.%lu", - &zinfo->zarr.nczarr_version.major, - &zinfo->zarr.nczarr_version.minor, - &zinfo->zarr.nczarr_version.release) == 0) - {stat = NC_ENCZARR; goto done;} + if((stat = applycontrols(zfile))) goto done; /* Load auth info from rc file */ if((stat = ncuriparse(nc->path,&uri))) goto done; if(uri) { - if((stat = NC_authsetup(&zinfo->auth, uri))) + if((stat = NC_authsetup(&zfile->auth, uri))) goto done; } + /* initialize map handle*/ + if((stat = NCZ_get_map(file,uri,(mode_t)nc->mode,zfile->flags,NULL,&zfile->map))) goto done; + + /* Get the zarr_format */ + if((stat = NCZ_infer_open_zarr_format(file))) goto done; + + /* And add the consolidated metadata manager to file */ + /* Must follow NCZ_infer_ope_zarr_format because it uses the discovered zarr format */ + if((stat = NCZMD_set_metadata_handler(file))) goto done; + + /* Set the nczarr format; must follow set_metadata_handler because it needs to read metadata */ + if((stat = NCZ_infer_open_nczarr_format(file))) goto done; + + /* And get the format dispatcher: uses discovered zarr and nczarr formats and the metadata handler */ + if((stat = NCZ_get_open_formatter(file, (const NCZ_Formatter**)&zfile->dispatcher))) goto done; + + /* Load the meta-data */ + if((stat = ncz_decode_file(file))) goto done; + done: - nullfree(zarr_format); - nullfree(nczarr_version); ncurifree(uri); nclistfreeall(modeargs); - if(json) NCJreclaim(json); - nullfree(content); return ZUNTRACE(stat); } @@ -210,7 +198,7 @@ int NCZ_get_libversion(unsigned long* majorp, unsigned long* minorp,unsigned long* releasep) { unsigned long m0,m1,m2; - sscanf(NCZARRVERSION,"%lu.%lu.%lu",&m0,&m1,&m2); + sscanf(NCZARR_PACKAGE_VERSION,"%lu.%lu.%lu",&m0,&m1,&m2); if(majorp) *majorp = m0; if(minorp) *minorp = m1; if(releasep) *releasep = m2; @@ -231,8 +219,8 @@ NCZ_get_libversion(unsigned long* majorp, unsigned long* minorp,unsigned long* r int NCZ_get_superblock(NC_FILE_INFO_T* file, int* superblockp) { - NCZ_FILE_INFO_T* zinfo = file->format_file_info; - if(superblockp) *superblockp = zinfo->zarr.nczarr_version.major; + NCZ_FILE_INFO_T* zfile = file->format_file_info; + if(superblockp) *superblockp = zfile->zarr.nczarr_format; return NC_NOERR; } @@ -252,7 +240,19 @@ controllookup(NClist* controls, const char* key) return NULL; } - +/** +Look to various sources to get control information +for a given dataset. Current sources: +1. From URL: + * "mode=..." + * "log" + * "show=..." +2. Environment variables: + +@param zinfo modified to add controls +@return NC_NOERR if success +@return NC_EXXX if failures +*/ static int applycontrols(NCZ_FILE_INFO_T* zinfo) { @@ -262,37 +262,55 @@ applycontrols(NCZ_FILE_INFO_T* zinfo) NClist* modelist = nclistnew(); size64_t noflags = 0; /* track non-default negative flags */ - if((value = controllookup(zinfo->controllist,"mode")) != NULL) { +#ifdef NETCDF_ENABLE_ZOH + /* Track http: vs https: */ + if(strncmp("http://",zinfo->common.file->hdr.name,7) == 0 || + strncmp("https://",zinfo->common.file->hdr.name,8) == 0 ){ + zinfo->mapimpl = NCZM_ZOH; + } +#endif + /* Apply controls from URL mode=... */ + if((value = controllookup(zinfo->urlcontrols,"mode")) != NULL) { if((stat = NCZ_comma_parse(value,modelist))) goto done; } - /* Process the modelist first */ - zinfo->controls.mapimpl = NCZM_DEFAULT; - zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ + + /* Process the modelist */ + zinfo->mapimpl = NCZM_DEFAULT; + zinfo->flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */ for(i=0;icontrols.flags |= (FLAG_PUREZARR); + zinfo->flags |= FLAG_PUREZARR; else if(strcasecmp(p,XARRAYCONTROL)==0) - zinfo->controls.flags |= FLAG_PUREZARR; + zinfo->flags |= FLAG_XARRAYDIMS; else if(strcasecmp(p,NOXARRAYCONTROL)==0) noflags |= FLAG_XARRAYDIMS; - else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP; - else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE; - else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3; + else if(strcasecmp(p,ZARRFORMAT2_STRING)==0) + zinfo->zarr.zarr_format = ZARRFORMAT2; + else if(strcasecmp(p,ZARRFORMAT3_STRING)==0) + zinfo->zarr.zarr_format = ZARRFORMAT3; + else if(strcasecmp(p,NOZMETADATACONTROL)==0) + zinfo->flags |= FLAG_NOCONSOLIDATED; + else if(strcasecmp(p,ZMETADATACONTROL)==0) + noflags |= FLAG_NOCONSOLIDATED; } + /* Apply negative controls by turning off negative flags */ /* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */ - zinfo->controls.flags &= (~noflags); + zinfo->flags &= (~noflags); /* Process other controls */ - if((value = controllookup(zinfo->controllist,"log")) != NULL) { - zinfo->controls.flags |= FLAG_LOGGING; + if((value = controllookup(zinfo->urlcontrols,"log")) != NULL) { + zinfo->flags |= FLAG_LOGGING; ncsetloglevel(NCLOGNOTE); } - if((value = controllookup(zinfo->controllist,"show")) != NULL) { + if((value = controllookup(zinfo->urlcontrols,"show")) != NULL) { if(strcasecmp(value,"fetch")==0) - zinfo->controls.flags |= FLAG_SHOWFETCH; + zinfo->flags |= FLAG_SHOWFETCH; } + + /* Environment Variables */ + done: nclistfreeall(modelist); return stat; diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 714fb3bcea..8c132c5db1 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -12,10 +12,12 @@ #ifndef ZARR_H #define ZARR_H +/* Opaque */ struct ChunkKey; struct S3credentials; +struct ZOBJ; -/* Intermediate results */ +/* Intermediate JSON results */ struct ZCVT { signed long long int64v; unsigned long long uint64v; @@ -28,7 +30,7 @@ struct ZCVT { /* zarr.c */ EXTERNL int ncz_create_dataset(NC_FILE_INFO_T*, NC_GRP_INFO_T*, NClist* controls); EXTERNL int ncz_open_dataset(NC_FILE_INFO_T*, NClist* controls); -EXTERNL int ncz_del_attr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* name); + /* HDF5 Mimics */ EXTERNL int NCZ_isnetcdf4(struct NC_FILE_INFO*); @@ -47,15 +49,9 @@ EXTERNL int NCZ_stringconvert1(nc_type typid, char* src, NCjson* jvalue); EXTERNL int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap); /* zsync.c */ -EXTERNL int ncz_sync_file(NC_FILE_INFO_T* file, int isclose); -EXTERNL int ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose); -EXTERNL int ncz_sync_atts(NC_FILE_INFO_T*, NC_OBJ* container, NCindex* attlist, NCjson* jatts, NCjson* jtypes, int isclose); -EXTERNL int ncz_read_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp); -EXTERNL int ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container); -EXTERNL int ncz_read_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp); -EXTERNL int ncz_read_file(NC_FILE_INFO_T* file); -EXTERNL int ncz_write_var(NC_VAR_INFO_T* var); -EXTERNL int ncz_read_superblock(NC_FILE_INFO_T* zinfo, char** nczarrvp, char** zarrfp); +EXTERNL int ncz_insert_attr(NCjson* jatts, NCjson* jtypes, const char* aname, NCjson** javaluep, const char* atype); +EXTERNL int ncz_encode_file(NC_FILE_INFO_T* file, int isclose); +EXTERNL int ncz_decode_file(NC_FILE_INFO_T* file); /* zutil.c */ EXTERNL int NCZ_grpkey(const NC_GRP_INFO_T* grp, char** pathp); @@ -63,31 +59,51 @@ EXTERNL int NCZ_varkey(const NC_VAR_INFO_T* var, char** pathp); EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp); EXTERNL int ncz_splitkey(const char* path, NClist* segments); EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep); -EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep); -EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp); -EXTERNL int NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* typeidp); -EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); + +EXTERNL int NCZ_inferattrtype(const char* aname, nc_type typehint, const NCjson* value, nc_type* typeidp); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); -EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json); +EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, const NCjson* json); EXTERNL int NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp); EXTERNL int NCZ_isLittleEndian(void); EXTERNL int NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NClist* objlist); EXTERNL int NCZ_grpname_full(int gid, char** pathp); -EXTERNL int ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var); EXTERNL int NCZ_comma_parse(const char* s, NClist* list); EXTERNL int NCZ_swapatomicdata(size_t datalen, void* data, int typesize); EXTERNL char** NCZ_clonestringvec(size_t len, const char** vec); EXTERNL void NCZ_freestringvec(size_t len, char** vec); +EXTERNL void NCZ_clearstringvec(size_t len, char** vec); +EXTERNL void NCZ_reclaim_diminfo_list(NClist* diminfo); +EXTERNL void NCZ_reclaim_diminfo(struct NCZ_DimInfo* diminfo); +EXTERNL void NCZ_reclaim_dimdecl_list(NClist* dimdecl); +EXTERNL void NCZ_reclaim_dimdecl(struct NCZ_DimDecl* dimdecl); EXTERNL int NCZ_ischunkname(const char* name,char dimsep); -EXTERNL char* NCZ_chunkpath(struct ChunkKey key); -EXTERNL int NCZ_reclaim_fill_value(NC_VAR_INFO_T* var); -EXTERNL int NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp); -EXTERNL int NCZ_get_maxstrlen(NC_OBJ* obj); -EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen); -EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen); +EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, size_t maxstrlen); +EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, size_t maxstrlen); EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const void* memory, size_t count, int reading, void* copy); -EXTERNL int NCZ_iscomplexjson(const NCjson* value, nc_type typehint); +EXTERNL int NCZ_iscomplexjson(const char* aname, const NCjson* value); +EXTERNL int NCZ_iscomplexjsonstring(const char* aname, size_t textlen, const char* text, NCjson** jsonp); +EXTERNL int NCZ_makeFQN(NC_OBJ* obj, NCbytes* fqn); +EXTERNL int NCZ_locateFQN(NC_GRP_INFO_T* parent, const char* fqn, NC_SORT sort, NC_OBJ** objectp, char** basenamep); +EXTERNL int NCZ_search_name(NC_GRP_INFO_T* startgrp, const char* name, NC_SORT sort, NC_OBJ** objectp); +EXTERNL char* NCZ_deescape(const char* s); +EXTERNL char* NCZ_backslashescape(const char* s); +EXTERNL int NCZ_sortstringlist(void* vec, size_t count); +EXTERNL int NCZ_sortpairlist(void* vec, size_t count); +EXTERNL struct NCZ_AttrInfo NCZ_emptyAttrInfo(void); +EXTERNL void NCZ_clearAttrInfo(NC_FILE_INFO_T* file, struct NCZ_AttrInfo* ainfo); +EXTERNL void NCZ_setatts_read(NC_OBJ* container); +EXTERNL int NCZ_decodesizet64vec(const NCjson* jshape, size_t* rankp, size64_t* shapes); +EXTERNL int NCZ_decodesizetvec(const NCjson* jshape, size_t* rankp, size_t* shapes); +EXTERNL int NCZ_uniquedimname(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, NCZ_DimInfo* dimdata, NC_DIM_INFO_T** dimp, NCbytes* dimname); +EXTERNL int NCZ_computeattrdata(NC_FILE_INFO_T* file, const NCjson* jdata, struct NCZ_AttrInfo* att); +EXTERNL int NCZ_dictgetalt(const NCjson* jdict, const NCjson** jvaluep, ...); +EXTERNL int NCZ_dictgetalt2(const NCjson* jdict, const NCjson** jvaluep, const char* name1, const char* name2); /* common case */ +EXTERNL int NCZ_getnczarrkey(NC_FILE_INFO_T* file, struct ZOBJ* jsonz, const char* name, const NCjson** jncxxxp); +EXTERNL char* NCZ_chunkpath(struct ChunkKey key); +EXTERNL size_t NCZ_get_maxstrlen(NC_OBJ* obj); +EXTERNL char NCZ_get_dimsep(NC_VAR_INFO_T* var); +EXTERNL double* NCZ_isnaninfstring(const char* val); /* zwalk.c */ EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata); diff --git a/libnczarr/zattr.c b/libnczarr/zattr.c index 7f3ef55454..9affa0df80 100644 --- a/libnczarr/zattr.c +++ b/libnczarr/zattr.c @@ -11,9 +11,75 @@ #include "zincludes.h" #include "zfilter.h" +#include "zfill.h" #undef ADEBUG +/* Build table of dual attributes; + build unsorted, but sort at first reference + so we can do binary search. +*/ +static struct DUALATT {const char* name; DualAtt dasort; NC_SORT container;} dualatts[] = { +{NC_FillValue, DA_FILLVALUE,NCVAR}, +{NC_NCZARR_MAXSTRLEN_ATTR, DA_MAXSTRLEN,NCVAR}, +{NC_NCZARR_DFALT_MAXSTRLEN_ATTR, DA_DFALTSTRLEN,NCFILE}, /* NCGRP refers to root group in file */ +{NC_NCZARR_SEPARATOR_ATTR, DA_SEP,NCVAR}, +{NC_NCZARR_DFALT_SEPARATOR_ATTR, DA_DFALTSEP,NCFILE}, +{NC_QUANTIZE_BITGROOM_ATT_NAME, DA_QUANTIZE,NCVAR}, +{NC_QUANTIZE_GRANULARBR_ATT_NAME, DA_QUANTIZE,NCVAR}, +{NC_QUANTIZE_BITROUND_ATT_NAME, DA_QUANTIZE,NCVAR} +}; +#define NDUALATTS (sizeof(dualatts)/sizeof(struct DUALATT)) +static int dualsorted = 0; + +/* Forward */ +static int charify(const NCjson* src, NCbytes* buf); +static int json_convention_read(const NCjson* json, NCjson** jtextp); +static int islegalatt(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att, size_t alen); + +static int +dasort(const void* a, const void* b) +{ + const struct DUALATT *daa, *dab; + daa = a; dab = b; + return strcasecmp(daa->name,dab->name); +} + +static int +dacmp(const void* key, const void* elem) +{ + const struct DUALATT *delem = elem; + return strcasecmp(key,delem->name); +} + +/* Some attributes are reflected in var|file structure */ +DualAtt +NCZ_is_dual_att(const char* aname) +{ + void* match = NULL; + if(!dualsorted) { + qsort((void*)dualatts, NDUALATTS, sizeof(struct DUALATT),dasort); + dualsorted = 1; + } + /* Binary search the set of set of atomictypes */ + assert(dualsorted); + match = bsearch((void*)aname,(void*)dualatts,NDUALATTS,sizeof(struct DUALATT),dacmp); + if(match == NULL) return DA_NOT; + return ((struct DUALATT*)match)->dasort; +} + +/* For dual attributes, specify if they are associated with a var vs file */ +NC_SORT +NCZ_dual_att_container(DualAtt da) +{ + size_t i; + /* Since we cannot simultaneously binary search on two fields, do simple linear search */ + for(i=0;inc4_info; NCZ_FILE_INFO_T* zinfo = file->format_file_info; - assert(grp && attlist && file && zinfo); + assert(grp && file && zinfo); - if (varid == NC_GLOBAL) - { - /* Do we need to read the atts? */ - if (!grp->atts_read) - if ((retval = ncz_read_atts(file, (NC_OBJ*)grp))) - return retval; - - if (varp) - *varp = NULL; - *attlist = grp->att; - } - else - { + if (varid == NC_GLOBAL) { + if (varp) *varp = NULL; + if(attlistp) *attlistp = grp->att; + } else { NC_VAR_INFO_T *var; - - if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, (size_t)varid))) - return NC_ENOTVAR; + if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, (size_t)varid))) return NC_ENOTVAR; assert(var->hdr.id == varid); - - /* Do we need to read the atts? */ - if (!var->atts_read) - if ((retval = ncz_read_atts(file, (NC_OBJ*)var))) - return retval; - - if (varp) - *varp = var; - *attlist = var->att; + if (varp) *varp = var; + if(attlistp) *attlistp = var->att; } - return NC_NOERR; + return stat; } /** @@ -72,7 +120,8 @@ ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **at * See the reserved attribute table in libsrc4/nc4internal.c. * The special attributes are the ones marked with NAMEONLYFLAG. * For example: NCPROPS, ISNETCDF4ATT, and SUPERBLOCKATT, and CODECS. - * These atts are not all really in the file, they are constructed on the fly. + * Some of these atts are stored in the file, but other are constructed on the fly. + * The later are distinguished by the VIRTUALFLAG. * * @param h5 Pointer to ZARR file info struct. * @param var Pointer to var info struct; NULL signals global. @@ -103,19 +152,19 @@ ncz_get_att_special(NC_FILE_INFO_T* h5, NC_VAR_INFO_T* var, const char* name, /* Handle the per-var case(s) first */ if(var != NULL) { #ifdef NETCDF_ENABLE_NCZARR_FILTERS - if(strcmp(name,NC_ATT_CODECS)==0) { + if(strcmp(name,NC_ATT_CODECS)==0) { NClist* filters = (NClist*)var->filters; if(mem_type == NC_NAT) mem_type = NC_CHAR; if(mem_type != NC_CHAR) {stat = NC_ECHAR; goto done;} if(filetypep) *filetypep = NC_CHAR; - if(lenp) *lenp = 0; - if(filters == NULL) goto done; - if((stat = NCZ_codec_attr(var,lenp,data))) goto done; - } + if(lenp) *lenp = 0; + if(filters == NULL) goto done; + if((stat = NCZ_codec_attr(var,lenp,data))) goto done; + } #endif - goto done; + goto done; } /* The global reserved attributes */ @@ -127,7 +176,7 @@ ncz_get_att_special(NC_FILE_INFO_T* h5, NC_VAR_INFO_T* var, const char* name, if(mem_type != NC_CHAR) {stat = NC_ECHAR; goto done;} if(filetypep) *filetypep = NC_CHAR; - len = strlen(h5->provenance.ncproperties); + len = strlen(h5->provenance.ncproperties); if(lenp) *lenp = len; if(data) strncpy((char*)data,h5->provenance.ncproperties,len+1); } else if(strcmp(name,ISNETCDF4ATT)==0 @@ -160,7 +209,7 @@ ncz_get_att_special(NC_FILE_INFO_T* h5, NC_VAR_INFO_T* var, const char* name, } /** - * @internal I think all atts should be named the exact same thing, to +* @internal I think all atts should be named the exact same thing, to * avoid confusion! * * @param ncid File and group ID. @@ -188,7 +237,7 @@ NCZ_rename_att(int ncid, int varid, const char *name, const char *newname) NC_ATT_INFO_T *att; NCindex *list; char norm_newname[NC_MAX_NAME + 1], norm_name[NC_MAX_NAME + 1]; - int retval = NC_NOERR; + int stat = NC_NOERR; if (!name || !newname) return NC_EINVAL; @@ -201,8 +250,8 @@ NCZ_rename_att(int ncid, int varid, const char *name, const char *newname) return NC_EMAXNAME; /* Find info for this file, group, and h5 info. */ - if ((retval = nc4_find_grp_h5(ncid, &grp, &h5))) - return retval; + if ((stat = nc4_find_grp_h5(ncid, &grp, &h5))) + return stat; assert(h5 && grp); /* If the file is read-only, return an error. */ @@ -210,21 +259,21 @@ NCZ_rename_att(int ncid, int varid, const char *name, const char *newname) return NC_EPERM; /* Check and normalize the name. */ - if ((retval = nc4_check_name(newname, norm_newname))) - return retval; + if ((stat = nc4_check_name(newname, norm_newname))) + return stat; /* Get the list of attributes. */ - if ((retval = ncz_getattlist(grp, varid, &var, &list))) - return retval; + if ((stat = ncz_getattlist(grp, varid, &var, &list))) + return stat; /* Is new name in use? */ att = (NC_ATT_INFO_T*)ncindexlookup(list,norm_newname); if(att != NULL) - return NC_ENAMEINUSE; + return THROW(NC_ENAMEINUSE); /* Normalize name and find the attribute. */ - if ((retval = nc4_normalize_name(name, norm_name))) - return retval; + if ((stat = nc4_normalize_name(name, norm_name))) + return stat; att = (NC_ATT_INFO_T*)ncindexlookup(list,norm_name); if (!att) @@ -250,7 +299,7 @@ NCZ_rename_att(int ncid, int varid, const char *name, const char *newname) /* Mark attributes on variable dirty, so they get written */ if(var) var->attr_dirty = NC_TRUE; - return retval; + return stat; } /** @@ -277,11 +326,8 @@ NCZ_del_att(int ncid, int varid, const char *name) NC_GRP_INFO_T *grp; NC_VAR_INFO_T *var; NC_FILE_INFO_T *h5; - NC_ATT_INFO_T *att; NCindex* attlist = NULL; - size_t i; - int deletedid; - int retval; + int stat; /* Name must be provided. */ if (!name) @@ -290,8 +336,8 @@ NCZ_del_att(int ncid, int varid, const char *name) LOG((2, "nc_del_att: ncid 0x%x varid %d name %s", ncid, varid, name)); /* Find info for this file, group, and h5 info. */ - if ((retval = nc4_find_grp_h5(ncid, &grp, &h5))) - return retval; + if ((stat = nc4_find_grp_h5(ncid, &grp, &h5))) + return stat; assert(h5 && grp); /* If the file is read-only, return an error. */ @@ -304,13 +350,13 @@ NCZ_del_att(int ncid, int varid, const char *name) { if (h5->cmode & NC_CLASSIC_MODEL) return NC_ENOTINDEFINE; - if ((retval = NCZ_redef(ncid))) - return retval; + if ((stat = NCZ_redef(ncid))) + return stat; } /* Get either the global or a variable attribute list. */ - if ((retval = ncz_getattlist(grp, varid, &var, &attlist))) - return retval; + if ((stat = ncz_getattlist(grp, varid, &var, &attlist))) + return stat; #ifdef LOOK /* Determine the location id in the ZARR file. */ @@ -320,42 +366,45 @@ NCZ_del_att(int ncid, int varid, const char *name) locid = ((NCZ_VAR_INFO_T *)(var->format_var_info))->hdf_datasetid; #endif + /* Defer to the internal version */ + stat = NCZ_attr_delete(h5,attlist,name); + + return stat; +} + +/* Internal version for deleting an attribute */ +int +NCZ_attr_delete(NC_FILE_INFO_T* file, NCindex* attlist, const char* name) +{ + int stat = NC_NOERR; + NC_ATT_INFO_T* att = NULL; + NCZ_ATT_INFO_T* za = NULL; + size_t i; + int deletedid; + /* Now find the attribute by name. */ if (!(att = (NC_ATT_INFO_T*)ncindexlookup(attlist, name))) return NC_ENOTATT; /* Reclaim the content of the attribute */ if(att->data) { - if((retval = NC_reclaim_data_all(h5->controller,att->nc_typeid,att->data,att->len))) return retval; + if((stat = NC_reclaim_data_all(file->controller,att->nc_typeid,att->data,(size_t)att->len))) return stat; } att->data = NULL; att->len = 0; - /* Delete it from the ZARR file, if it's been created. */ - if (att->created) - { -#ifdef LOOK - assert(locid); - if (H5Adelete(locid, att->hdr.name) < 0) - return NC_EATTMETA; -#endif - } - deletedid = att->hdr.id; /* reclaim associated NCZarr info */ - { - NCZ_ATT_INFO_T* za = (NCZ_ATT_INFO_T*)att->format_att_info; - nullfree(za); - } + za = (NCZ_ATT_INFO_T*)att->format_att_info; + nullfree(za); /* Remove this attribute in this list */ - if ((retval = nc4_att_list_del(attlist, att))) - return retval; + if ((stat = nc4_att_list_del(attlist, att))) + return stat; /* Renumber all attributes with higher indices. */ - for (i = 0; i < ncindexsize(attlist); i++) - { + for (i = 0; i < ncindexsize(attlist); i++) { NC_ATT_INFO_T *a; if (!(a = (NC_ATT_INFO_T *)ncindexith(attlist, i))) continue; @@ -376,10 +425,10 @@ NCZ_del_att(int ncid, int varid, const char *name) * * @param type A netcdf atomic type. * - * @return Type size in bytes, or -1 if type not found. + * @return Type size in bytes, or 0 if type not found. * @author Dennis Heimbigner, Ed Hartnett */ -static int +static size_t nc4typelen(nc_type type) { switch(type){ @@ -399,7 +448,7 @@ nc4typelen(nc_type type) case NC_UINT64: return 8; } - return -1; + return 0; } /** @@ -425,109 +474,44 @@ nc4typelen(nc_type type) * @author Dennis Heimbigner, Ed Hartnett */ int -ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, - size_t len, const void *data, nc_type mem_type, int force) +ncz_put_att(int ncid, int containerid, const char *name, nc_type file_type, + size_t len, const void *data, nc_type mem_type) { + int stat = NC_NOERR, range_error = 0; NC* nc; - NC_FILE_INFO_T *h5 = NULL; + NC_FILE_INFO_T *file = NULL; + NC_GRP_INFO_T *grp = NULL; NC_VAR_INFO_T *var = NULL; - NCindex* attlist = NULL; - NC_ATT_INFO_T* att; char norm_name[NC_MAX_NAME + 1]; - nc_bool_t new_att = NC_FALSE; - int retval = NC_NOERR, range_error = 0; - size_t type_size; - int ret; - int ncid; - void* copy = NULL; - /* Save the old att data and length and old fillvalue in case we need to rollback on error */ - struct Save { - size_t len; - void* data; - nc_type type; /* In case we change the type of the attribute */ - } attsave = {0,NULL,-1}; - struct Save fillsave = {0,NULL,-1}; - - h5 = grp->nc4_info; - nc = h5->controller; - assert(nc && grp && h5); - - ncid = nc->ext_ncid | grp->hdr.id; - - /* Find att, if it exists. (Must check varid first or nc_test will - * break.) This also does lazy att reads if needed. */ - if ((ret = ncz_getattlist(grp, varid, &var, &attlist))) - return ret; - - /* The length needs to be positive (cast needed for braindead - systems with signed size_t). */ - if((unsigned long) len > X_INT_MAX) - return NC_EINVAL; - - /* Check name before LOG statement. */ - if (!name || strlen(name) > NC_MAX_NAME) - return NC_EBADNAME; - - LOG((1, "%s: ncid 0x%x varid %d name %s file_type %d mem_type %d len %d", - __func__,ncid, varid, name, file_type, mem_type, len)); - - /* If len is not zero, then there must be some data. */ - if (len && !data) - return NC_EINVAL; - - /* If the file is read-only, return an error. */ - if (h5->no_write) - return NC_EPERM; - - /* Check and normalize the name. */ - if ((retval = nc4_check_name(name, norm_name))) - return retval; - - /* Check that a reserved att name is not being used improperly */ - const NC_reservedatt* ra = NC_findreserved(name); - if(ra != NULL && !force) { - /* case 1: grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ - if (nc->ext_ncid == ncid && varid == NC_GLOBAL && grp->parent == NULL - && (ra->flags & READONLYFLAG)) - return NC_ENAMEINUSE; - /* case 2: grp=NA, varid!=NC_GLOBAL, flags & HIDDENATTRFLAG */ - if (varid != NC_GLOBAL && (ra->flags & HIDDENATTRFLAG)) - return NC_ENAMEINUSE; + const NC_reservedatt* ra = NULL; + NC_ATT_INFO_T* att = NULL; + NCindex* attlist = NULL; + NC_OBJ* obj = NULL; + void* src = NULL; + int isnew = 0; + int isconverted = 0; + DualAtt dualatt; + + if(containerid == NC_GLOBAL) { + if((stat= nc4_find_grp_h5(ncid, &grp, &file))) goto done; + attlist = grp->att; + obj = (NC_OBJ*)grp; + } else { + if((stat= nc4_find_grp_h5_var(ncid, containerid, &file, &grp, &var))) goto done; + attlist = var->att; + obj = (NC_OBJ*)var; } + assert(file != NULL && grp != NULL && (containerid == NC_GLOBAL || var != NULL)); + assert(attlist != NULL); + + nc = file->controller; + assert(nc && grp && file); - /* See if there is already an attribute with this name. */ - att = (NC_ATT_INFO_T*)ncindexlookup(attlist,norm_name); - - if (!att) - { - /* If this is a new att, require define mode. */ - if (!(h5->flags & NC_INDEF)) - { - - if (h5->cmode & NC_CLASSIC_MODEL) - return NC_ENOTINDEFINE; - if ((retval = NCZ_redef(ncid))) - BAIL(retval); - } - new_att = NC_TRUE; - } - else - { - /* For an existing att, if we're not in define mode, the len - must not be greater than the existing len for classic model. */ - if (!(h5->flags & NC_INDEF) && - len * (size_t)nc4typelen(file_type) > (size_t)att->len * (size_t)nc4typelen(att->nc_typeid)) - { - if (h5->cmode & NC_CLASSIC_MODEL) - return NC_ENOTINDEFINE; - if ((retval = NCZ_redef(ncid))) - BAIL(retval); - } - } + LOG((1, "%s: ncid 0x%x containerid %d name %s file_type %d mem_type %d len %d", + __func__,ncid, containerid, name, file_type, mem_type, len)); /* We must have two valid types to continue. */ - if (file_type == NC_NAT || mem_type == NC_NAT) - return NC_EBADTYPE; + if (file_type == NC_NAT || mem_type == NC_NAT) return NC_EBADTYPE; /* No character conversions are allowed. */ if (file_type != mem_type && @@ -537,203 +521,81 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, /* For classic mode file, only allow atts with classic types to be * created. */ - if (h5->cmode & NC_CLASSIC_MODEL && file_type > NC_DOUBLE) + if (file->cmode & NC_CLASSIC_MODEL && file_type > NC_DOUBLE) return NC_ESTRICTNC3; - /* Add to the end of the attribute list, if this att doesn't - already exist. */ - if (new_att) - { - LOG((3, "adding attribute %s to the list...", norm_name)); - if ((ret = nc4_att_list_add(attlist, norm_name, &att))) - BAIL(ret); - - /* Allocate storage for the ZARR specific att info. */ - if (!(att->format_att_info = calloc(1, sizeof(NCZ_ATT_INFO_T)))) - BAIL(NC_ENOMEM); - - if(varid == NC_GLOBAL) - att->container = (NC_OBJ*)grp; - else - att->container = (NC_OBJ*)var; - } - - /* Now fill in the metadata. */ - att->dirty = NC_TRUE; + /* The length needs to be positive (cast needed for brain-dead systems with signed size_t). */ + if((unsigned long) len > X_INT_MAX) {stat = NC_EINVAL; goto done;} - /* When we reclaim existing data, make sure to use the right type */ - if(new_att) attsave.type = file_type; else attsave.type = att->nc_typeid; - att->nc_typeid = file_type; + /* If len is not zero, then there must be some data. */ + if (len > 0 && data == NULL) {stat = NC_EINVAL; goto done;} - /* Get information about this (possibly new) type. */ - if ((retval = nc4_get_typelen_mem(h5, file_type, &type_size))) - return retval; + /* If the file is read-only, return an error. */ + if (file->no_write) {stat = NC_EPERM; goto done;} - if (att->data) - { - assert(attsave.data == NULL); - attsave.data = att->data; - attsave.len = att->len; - att->data = NULL; + /* Check and normalize the name. */ + if (!name || strlen(name) > NC_MAX_NAME) {stat = NC_EBADNAME; goto done;} + if ((stat = nc4_check_name(name, norm_name))) goto done; + + /* Check that a reserved att name is not being used improperly, meaning: + 1. attr is in root group and is readonly + 2. attr is in var and is readonly + */ + ra = NC_findreserved(name); + if(ra != NULL) { + /* case 1: grp=root, containerid==NC_GLOBAL, flags & READONLYFLAG */ + if (grp != NULL && grp->parent == NULL && (ra->flags & READONLYFLAG)) + {stat = NC_ENAMEINUSE; goto done;} + /* case 2: grp=NA, objid!=NC_GLOBAL, flags & HIDDENATTRFLAG */ + if (grp != NULL && (ra->flags & HIDDENATTRFLAG)) + {stat = NC_ENAMEINUSE; goto done;} } - /* If this is the _FillValue attribute, then we will also have to - * copy the value to the fill_value pointer of the NC_VAR_INFO_T - * struct for this var. (But ignore a global _FillValue - * attribute). Also kill the cache fillchunk as no longer valid */ - if (!strcmp(att->hdr.name, NC_FillValue) && varid != NC_GLOBAL) - { - /* Fill value must have exactly one value */ - if (len != 1) - return NC_EINVAL; - - /* If we already wrote to the dataset, then return an error. */ - if (var->written_to) - return NC_ELATEFILL; - - /* Get the length of the veriable data type. */ - if ((retval = nc4_get_typelen_mem(grp->nc4_info, var->type_info->hdr.id, - &type_size))) - return retval; - - /* Already set a fill value? Now I'll have to free the old - * one. Make up your damn mind, would you? */ - if (var->fill_value) - { - /* reclaim later */ - fillsave.data = var->fill_value; - fillsave.type = var->type_info->hdr.id; - fillsave.len = 1; - var->fill_value = NULL; - } - - /* Determine the size of the fill value in bytes. */ - - { - nc_type var_type = var->type_info->hdr.id; - size_t var_type_size = var->type_info->size; - /* The old code used the var's type as opposed to the att's type; normally same, - but not required. Now we need to convert from the att's type to the var's type. - Note that we use mem_type rather than file_type because our data is in the form - of the memory data. When we later capture the memory data for the actual - attribute, we will use file_type as the target of the conversion. */ - if(mem_type != var_type && mem_type < NC_STRING && var_type < NC_STRING) { - /* Need to convert from memory data into copy buffer */ - if((copy = malloc(len*var_type_size))==NULL) BAIL(NC_ENOMEM); - if ((retval = nc4_convert_type(data, copy, mem_type, var_type, + /* copy and/or convert memory data to file format data */ + if(mem_type != file_type && mem_type < NC_STRING && mem_type < NC_STRING) { + size_t mem_type_len = 0; + size_t file_type_len = 0; + if ((stat = nc4_get_typelen_mem(file, mem_type, &mem_type_len))) return stat; + if ((stat = nc4_get_typelen_mem(file, file_type, &file_type_len))) return stat; + /* Need to convert from memory data into copy buffer */ + if((src = malloc(len*file_type_len))==NULL) {stat = NC_ENOMEM; goto done;} + if ((stat = nc4_convert_type(data, src, mem_type, file_type, len, &range_error, NULL, - (h5->cmode & NC_CLASSIC_MODEL), - NC_NOQUANTIZE, 0))) - BAIL(retval); - } else { /* no conversion */ - /* Still need a copy of the input data */ - copy = NULL; - if((retval = NC_copy_data_all(h5->controller, mem_type, data, 1, ©))) - BAIL(retval); - } - var->fill_value = copy; - copy = NULL; - } - - /* Indicate that the fill value was changed, if the variable has already - * been created in the file, so the dataset gets deleted and re-created. */ - if (var->created) - var->fill_val_changed = NC_TRUE; - /* Reclaim any existing fill_chunk */ - if((retval = NCZ_reclaim_fill_chunk(((NCZ_VAR_INFO_T*)var->format_var_info)->cache))) BAIL(retval); + (file->cmode & NC_CLASSIC_MODEL), + NC_NOQUANTIZE, 0))) + goto done; + isconverted = 1; + } else {/*no conversion */ + src = (void*)data; + isconverted = 0; } - - /* Copy the attribute data, if there is any. */ - if (len) - { - nc_type type_class; /* Class of attribute's type */ - - /* Get class for this type. */ - if ((retval = nc4_get_typeclass(h5, file_type, &type_class))) - return retval; - - assert(data); - { - /* Allocate top level of the copy */ - if (!(copy = malloc(len * type_size))) - BAIL(NC_ENOMEM); - /* Special case conversion from memory to file type */ - if(mem_type != file_type && mem_type < NC_STRING && file_type < NC_STRING) { - if ((retval = nc4_convert_type(data, copy, mem_type, file_type, - len, &range_error, NULL, - (h5->cmode & NC_CLASSIC_MODEL), - NC_NOQUANTIZE, 0))) - BAIL(retval); - } else if(mem_type == file_type) { /* General case: no conversion */ - if((retval = NC_copy_data(h5->controller,file_type,data,len,copy))) - BAIL(retval); - } else - BAIL(NC_EURL); - /* Store it */ - att->data = copy; copy = NULL; - } + + /* See if there is already an attribute with this name or create */ + if((stat = NCZ_getattr(file,obj,norm_name,file_type,&att,&isnew))) goto done; + /* insert/overwrite data */ + if(len > 0 && data != NULL) { /* overwrite old value */ + if((stat = NCZ_set_att_data(file,att,len,src))) goto done; } - /* If this is a maxstrlen attribute, then we will also have to - * sync the value to NCZ_VAR_INFO_T or NCZ_FILE_INFO_T structure */ - { - if(strcmp(att->hdr.name,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0 && varid == NC_GLOBAL && len == 1) { - NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)h5->format_file_info; - if((retval = nc4_convert_type(att->data, &zfile->default_maxstrlen, file_type, NC_INT, - len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) - BAIL(retval); - } else if(strcmp(att->hdr.name,NC_NCZARR_MAXSTRLEN_ATTR)==0 && varid != NC_GLOBAL && len == 1) { - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - if((retval = nc4_convert_type(att->data, &zvar->maxstrlen, file_type, NC_INT, - len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) - BAIL(retval); - } + /* Some attributes are reflected in var|grp structure so must be sync'd */ + switch (NCZ_dual_att_container(dualatt=NCZ_is_dual_att(name))) { + case NCVAR: + assert(var != NULL); + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,name,dualatt,FIXOBJ))) goto done; + break; + case NCFILE: + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)file->root_grp,name,dualatt,FIXOBJ))) goto done; + break; + default: break; /* ignore */ } - - att->dirty = NC_TRUE; - att->created = NC_FALSE; - att->len = len; - /* Mark attributes on variable dirty, so they get written */ - if(var) - var->attr_dirty = NC_TRUE; - /* Reclaim saved data */ - if(attsave.data != NULL) { - assert(attsave.len > 0); - (void)NC_reclaim_data_all(h5->controller,attsave.type,attsave.data,attsave.len); - attsave.len = 0; attsave.data = NULL; - } - if(fillsave.data != NULL) { - assert(fillsave.len > 0); - (void)NC_reclaim_data_all(h5->controller,fillsave.type,fillsave.data,fillsave.len); - fillsave.len = 0; fillsave.data = NULL; - } - -exit: - if(copy) - (void)NC_reclaim_data_all(h5->controller,file_type,copy,len); - if(retval) { - /* Rollback */ - if(attsave.data != NULL) { - assert(attsave.len > 0); - if(att->data) - (void)NC_reclaim_data_all(h5->controller,attsave.type,att->data,att->len); - att->len = attsave.len; att->data = attsave.data; - } - if(fillsave.data != NULL) { - assert(fillsave.len > 0); - if(att->data) - (void)NC_reclaim_data_all(h5->controller,fillsave.type,var->fill_value,1); - var->fill_value = fillsave.data; - } - } +done: + if(isconverted) (void)NC_reclaim_data_all(file->controller,file_type,src,len); /* If there was an error return it, otherwise return any potential range error value. If none, return NC_NOERR as usual.*/ - if (range_error) - return NC_ERANGE; - if (retval) - return retval; - return NC_NOERR; + if (range_error) return THROW(NC_ERANGE); + if (stat) return THROW(stat); + return THROW(NC_NOERR); } /** @@ -760,16 +622,16 @@ int NCZ_put_att(int ncid, int varid, const char *name, nc_type file_type, size_t len, const void *data, nc_type mem_type) { - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *file; NC_GRP_INFO_T *grp; int ret; - /* Find info for this file, group, and h5 info. */ - if ((ret = nc4_find_grp_h5(ncid, &grp, &h5))) + /* Find info for this file, group, and file info. */ + if ((ret = nc4_find_grp_h5(ncid, &grp, &file))) return ret; - assert(grp && h5); + assert(grp && file); - return ncz_put_att(grp, varid, name, file_type, len, data, mem_type, 0); + return ncz_put_att(ncid, varid, name, file_type, len, data, mem_type); } /** @@ -790,29 +652,29 @@ int NCZ_inq_att(int ncid, int varid, const char *name, nc_type *xtypep, size_t *lenp) { - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *file; NC_GRP_INFO_T *grp; NC_VAR_INFO_T *var = NULL; char norm_name[NC_MAX_NAME + 1]; - int retval; + int stat; LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid)); /* Find the file, group, and var info, and do lazy att read if * needed. */ - if ((retval = ncz_find_grp_var_att(ncid, varid, name, 0, 1, norm_name, - &h5, &grp, &var, NULL))) - return retval; + if ((stat = ncz_find_grp_var_att(ncid, varid, name, 0, 1, norm_name, + &file, &grp, &var, NULL))) + return stat; /* If this is one of the reserved atts, use nc_get_att_special. */ { const NC_reservedatt *ra = NC_findreserved(norm_name); if (ra && ra->flags & NAMEONLYFLAG) - return ncz_get_att_special(h5, var, norm_name, xtypep, NC_NAT, lenp, NULL, + return ncz_get_att_special(file, var, norm_name, xtypep, NC_NAT, lenp, NULL, NULL); } - return nc4_get_att_ptrs(h5, grp, var, norm_name, xtypep, NC_NAT, + return nc4_get_att_ptrs(file, grp, var, norm_name, xtypep, NC_NAT, lenp, NULL, NULL); } @@ -830,29 +692,29 @@ NCZ_inq_att(int ncid, int varid, const char *name, nc_type *xtypep, int NCZ_inq_attid(int ncid, int varid, const char *name, int *attnump) { - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *file; NC_GRP_INFO_T *grp; NC_VAR_INFO_T *var = NULL; char norm_name[NC_MAX_NAME + 1]; - int retval; + int stat; LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid)); /* Find the file, group, and var info, and do lazy att read if * needed. */ - if ((retval = ncz_find_grp_var_att(ncid, varid, name, 0, 1, norm_name, - &h5, &grp, &var, NULL))) - return retval; + if ((stat = ncz_find_grp_var_att(ncid, varid, name, 0, 1, norm_name, + &file, &grp, &var, NULL))) + return stat; /* If this is one of the reserved atts, use nc_get_att_special. */ { const NC_reservedatt *ra = NC_findreserved(norm_name); if (ra && ra->flags & NAMEONLYFLAG) - return ncz_get_att_special(h5, var, norm_name, NULL, NC_NAT, NULL, attnump, + return ncz_get_att_special(file, var, norm_name, NULL, NC_NAT, NULL, attnump, NULL); } - return nc4_get_att_ptrs(h5, grp, var, norm_name, NULL, NC_NAT, + return nc4_get_att_ptrs(file, grp, var, norm_name, NULL, NC_NAT, NULL, attnump, NULL); } @@ -872,23 +734,23 @@ int NCZ_inq_attname(int ncid, int varid, int attnum, char *name) { NC_ATT_INFO_T *att; - int retval = NC_NOERR; + int stat = NC_NOERR; ZTRACE(1,"ncid=%d varid=%d attnum=%d",ncid,varid,attnum); LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid)); /* Find the file, group, and var info, and do lazy att read if * needed. */ - if ((retval = ncz_find_grp_var_att(ncid, varid, NULL, attnum, 0, NULL, + if ((stat = ncz_find_grp_var_att(ncid, varid, NULL, attnum, 0, NULL, NULL, NULL, NULL, &att))) - goto done; + goto done; assert(att); /* Get the name. */ if (name) strcpy(name, att->hdr.name); done: - return ZUNTRACEX(retval,"name=%s",(retval?"":name)); + return ZUNTRACEX(stat,"name=%s",(stat?"":name)); } /** @@ -906,147 +768,669 @@ NCZ_inq_attname(int ncid, int varid, int attnum, char *name) * @author Dennis Heimbigner, Ed Hartnett */ int -NCZ_get_att(int ncid, int varid, const char *name, void *value, - nc_type memtype) +NCZ_get_att(int ncid, int varid, const char *name, void *value, nc_type mem_type) { - NC_FILE_INFO_T *h5; + int stat = NC_NOERR; + NC_FILE_INFO_T *file; NC_GRP_INFO_T *grp; NC_VAR_INFO_T *var = NULL; + NC_ATT_INFO_T *att = NULL; char norm_name[NC_MAX_NAME + 1]; - int retval; + nc_type file_type = NC_NAT; + int range_error = 0; LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid)); - /* Find the file, group, and var info, and do lazy att read if - * needed. */ - if ((retval = ncz_find_grp_var_att(ncid, varid, name, 0, 1, norm_name, - &h5, &grp, &var, NULL))) - return retval; + /* Check and normalize the name. */ + if (!name || strlen(name) > NC_MAX_NAME) {stat = NC_EBADNAME; goto done;} + if ((stat = nc4_check_name(name, norm_name))) goto done; + + /* Find the file and group and (optionally) var info */ + if(varid != NC_GLOBAL) { + if ((stat = nc4_find_grp_h5_var(ncid, varid, &file, &grp, &var))) goto done; + } else { /* just find grp and file */ + if ((stat = nc4_find_grp_h5(ncid, &grp, &file))) goto done; + var = NULL; + } /* If this is one of the reserved global atts, use nc_get_att_special. */ { const NC_reservedatt *ra = NC_findreserved(norm_name); if (ra && ra->flags & NAMEONLYFLAG) - return ncz_get_att_special(h5, var, norm_name, NULL, NC_NAT, NULL, NULL, + return ncz_get_att_special(file, var, norm_name, NULL, NC_NAT, NULL, NULL, value); } - return nc4_get_att_ptrs(h5, grp, var, norm_name, NULL, memtype, - NULL, NULL, value); -} + /* See if the attribute exists */ + stat = nc4_find_grp_att(grp,varid,norm_name,0,&att); #if 0 -static int -ncz_del_attr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* name) -{ - int i,stat = NC_NOERR; +???? /* If asking for _FillValue and it does not exist, build it using either var->fill_value or the default fill */ + if(stat == NC_ENOTATT && att == NULL && var != NULL && strcmp(norm_name,NC_FillValue)==0) { + int isnew = 0; + if((stat = NCZ_getattr(file,(NC_OBJ*)var,NC_FillValue,var->type_info->hdr.id,&att,&isnew))) goto done; + assert(isnew && att->data == NULL); + if((stat = NC_copy_data_all(file->controller,att->nc_typeid,NCZ_getdfaltfillvalue(att->nc_typeid),1,&att->data))) goto done; +assert(var->fill_value != att->data); + } +#endif - ZTRACE(); + /* stop if error */ + if(stat) goto done; + assert(att != NULL); + file_type = att->nc_typeid; + + /* We must have two valid types to continue. */ + if (file_type == NC_NAT || mem_type == NC_NAT) return NC_EBADTYPE; + + /* No character conversions are allowed. */ + if (file_type != mem_type && + (file_type == NC_CHAR || mem_type == NC_CHAR || + file_type == NC_STRING || mem_type == NC_STRING)) + return NC_ECHAR; + + /* For classic mode file, only allow atts with classic types to be + * created. */ + if (file->cmode & NC_CLASSIC_MODEL && file_type > NC_DOUBLE) + return NC_ESTRICTNC3; + + /* copy and/or convert memory data to file format data */ + if(mem_type != file_type && mem_type < NC_STRING && mem_type < NC_STRING) { + size_t mem_type_len = 0; + size_t file_type_len = 0; + if ((stat = nc4_get_typelen_mem(file, mem_type, &mem_type_len))) return stat; + if ((stat = nc4_get_typelen_mem(file, file_type, &file_type_len))) return stat; + /* Need to convert from file_type data into output buffer */ + if ((stat = nc4_convert_type(att->data, value, file_type, mem_type, + att->len, &range_error, NULL, + (file->cmode & NC_CLASSIC_MODEL), + NC_NOQUANTIZE, 0))) + goto done; +assert(var == NULL || (var->fill_value != att->data)); + } else { /* no conversion */ + /* Still need a copy of the input data */ + if((stat = NC_copy_data(file->controller, file_type, att->data, att->len, value))) goto done; +assert(var == NULL || (var->fill_value != att->data)); + } + +done: + /* If there was an error return it, otherwise return any potential + range error value. If none, return NC_NOERR as usual.*/ + if (range_error) return NC_ERANGE; + if (stat) return stat; + return THROW(stat); +} + + +/* +Create an attribute; This is the core of ncz_put_att above. +Caller must free values. +*/ +int +ncz_makeattr(NC_FILE_INFO_T* file, NC_OBJ* container, struct NCZ_AttrInfo* ainfo, NC_ATT_INFO_T** attp) +{ + int stat = NC_NOERR; + NC_ATT_INFO_T* att = NULL; + NCZ_ATT_INFO_T* zatt = NULL; + int new_att = 0; + NCindex* attlist = NULL; + if(container->sort == NCGRP) - stat = ncz_getattlist((NC_GRP_INFO_T*)container,NC_GLOBAL,NULL,&attlist); + attlist = ((NC_GRP_INFO_T*)container)->att; else - stat = ncz_getattlist((NC_VAR_INFO_T*)container,NC_GLOBAL,NULL,&attlist); - - goto done; - - /* Iterate over the attributes to locate the matching attribute */ - for(i=0;idict);i+=2) { - NCjson* key = nclistget(jattrs->dict,i); - assert(key->sort == NCJ_STRING); - if(strcmp(key->value,name)==0) { - /* Remove and reclaim */ - NCjson* value = nclistget(jattrs->dict,i+1); - nclistremove(jattrs->dict,i); - nclistremove(jattrs->dict,i+1); - NCJreclaim(key); - NCJreclaim(value); - break; - } + attlist = ((NC_VAR_INFO_T*)container)->att; + assert(attlist != NULL); + + if ((stat = nc4_get_typelen_mem(file, ainfo->nctype, &ainfo->typelen))) goto done; + + /* See if there is already an attribute with this name. */ + att = (NC_ATT_INFO_T*)ncindexlookup(attlist,ainfo->name); + new_att = (att == NULL?1:0); + + if(new_att) { + if((stat=nc4_att_list_add(attlist,ainfo->name,&att))) goto done; + if((zatt = calloc(1,sizeof(NCZ_ATT_INFO_T))) == NULL) {stat = NC_ENOMEM; goto done;} + zatt->common.file = file; + att->container = container; + att->format_att_info = zatt; + } + + att->nc_typeid = ainfo->nctype; + if(ainfo->datalen > 0 && ainfo->data) { + /* Fill in the attribute's type and value; ainfo->data is copied */ + if((stat = NCZ_set_att_data(file,att,ainfo->datalen,ainfo->data))) goto done; } - /* Write the json back out */ - if((stat = ncz_unload_jatts(zinfo, container, jattrs, jtypes))) - goto done; + att->dirty = NC_TRUE; + if(attp) {*attp = att; att = NULL;} + + if(container->sort != NCGRP) + ((NC_VAR_INFO_T*)container)->attr_dirty = NC_TRUE; done: - NCJreclaim(jattrs); - NCJreclaim(jtypes); - return stat; + if(stat) { + if(new_att && att) { + nc4_att_list_del(attlist,att); + nullfree(zatt); + } + } + return THROW(stat); +} + +/* Write/overwrite NC_XXX_INFO_T fields for fields that have a dual attribute. + Reclaim old data and replace with copy of data argument + Do not synchronize with the corresponding attribute. +*/ +int +NCZ_set_dual_obj_data(NC_FILE_INFO_T* file, NC_OBJ* object, const char* name, DualAtt which, size_t len, const void* data) +{ + int stat = NC_NOERR; + + NC_UNUSED(name); /* for now */ + if(object->sort == NCGRP) { + assert(file->root_grp == (NC_GRP_INFO_T*)object); + switch(which) { + case DA_DFALTSTRLEN: + assert(len == 1); + zsetdfaltstrlen((size_t)((int*)data)[0],file); + break; + default: stat = NC_EINVAL; break; + } + } else { + NC_VAR_INFO_T* var = NULL; + nc_type tid; + assert(object->sort == NCVAR); + var = (NC_VAR_INFO_T*)object; + tid = var->type_info->hdr.id; + switch(which) { + case DA_FILLVALUE: + assert(len == 1); + if((stat = NC_reclaim_data_all(file->controller,tid,var->fill_value,len))) goto done; + var->fill_value = NULL; + if((stat = NC_copy_data_all(file->controller,tid,data,len,&var->fill_value))) goto done; + break; + case DA_MAXSTRLEN: + assert(len == 1); + zsetmaxstrlen((size_t)((int*)data)[0],var); + break; + case DA_QUANTIZE: + assert(len == 1); + var->nsd = ((int*)data)[0]; + break; + case DA_SEP: + assert(len == 1); + zsetmaxstrlen((size_t)((int*)data)[0],var); + break; + default: assert(0); + } + } +done: + return THROW(stat); } -#endif -/* If we do not have a _FillValue, then go ahead and create it */ +/* Write/overwrite attribute data for any attribute. + Reclaim old data and replace with copy of data argument. + Do not synchronize with any corresponding NC_XXX_INFO_T field. +*/ int -ncz_create_fillvalue(NC_VAR_INFO_T* var) +NCZ_set_att_data(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att, size_t len, const void* data) +{ + int stat = NC_NOERR; + void* copy = NULL; + + /* Consistency checks */ + assert((len == 0 && data == NULL) || (len > 0 && data != NULL)); + assert((att->len == 0 && att->data == NULL) || (att->len > 0 && att->data != NULL)); + + /* Reclaim any old att->data */ + if(att->data != NULL) { + /* remove old att data */ + (void)NC_reclaim_data_all(file->controller,att->nc_typeid,att->data,att->len); + att->data = NULL; + att->len = 0; + } + + /* set att->data with a copy of data */ + if(len > 0 && data != NULL) { + if((stat = NC_copy_data_all(file->controller, att->nc_typeid, data, len, ©))) goto done; + /* set the att data */ + att->len = len; + att->data = copy; copy = NULL; + } + att->dirty = NC_TRUE; + +done: + /* cleanup */ + if(copy != NULL) (void)NC_reclaim_data_all(file->controller,att->nc_typeid,copy,len); + return THROW(stat); +} + +/* +Extract data for an attribute +This is essentially Version 2|3 agnostic because the +data part of an attribute is (currently) the same for both versions. +*/ +int +NCZ_computeattrdata(NC_FILE_INFO_T* file, const NCjson* jdata, struct NCZ_AttrInfo* ainfo) +{ + int stat = NC_NOERR; + NCbytes* buf = ncbytesnew(); + NCjson* jtext = NULL; + int isjson = 0; /* 1 => attribute value is neither scalar nor array of scalars */ + int reclaimvalues = 0; + + NC_UNUSED(file); + + ZTRACE(3,"typeid=%d values=|%s|",ainfo->nctype,NCJtotext(jdata,0)); + + /* See if this is a simple vector (or scalar) of atomic types vs more complex json */ + isjson = (ainfo->nctype == NC_JSON || NCZ_iscomplexjson(ainfo->name,jdata)); + + /* Get assumed type */ + if(ainfo->nctype == NC_NAT && !isjson) { + if((stat = NCZ_inferattrtype(ainfo->name,NC_NAT,jdata,&ainfo->nctype))) goto done; + } + + if(isjson) { + /* Apply the JSON attribute convention and convert to JSON string */ + ainfo->nctype = NC_CHAR; + if((stat = json_convention_read(jdata,&jtext))) goto done; + jdata = jtext; jtext = NULL; + reclaimvalues = 1; + } + + if((stat = NC4_inq_atomic_type(ainfo->nctype, NULL, &ainfo->typelen))) goto done; + + /* Convert the JSON attribute values to the actual netcdf attribute bytes */ + assert(ainfo->data == NULL); + if((stat = NCZ_attr_convert(jdata,ainfo->nctype,&ainfo->datalen,buf))) goto done; + ainfo->data = ncbytesextract(buf); + +done: + ncbytesfree(buf); + if(reclaimvalues) {NCJreclaim((NCjson*)jdata); jdata = NULL; /* we created it */} + return ZUNTRACEX(THROW(stat),"typelen=%d count=%u",(ainfo->nctype?ainfo->nctype:0),(ainfo->datalen?ainfo->datalen:-1)); +} + +/* Convert a json value to actual data values of an attribute. +@param src - [in] src value +@param typeid - [in] dst type +@param countp - [out] dst length (if dict or array) +@param dst - [out] dst data +*/ + +int +NCZ_attr_convert(const NCjson* src, nc_type typeid, size_t* countp, NCbytes* dst) { int stat = NC_NOERR; size_t i; - NC_ATT_INFO_T* fv = NULL; - - /* Have the var's attributes been read? */ - if(!var->atts_read) goto done; /* above my pay grade */ - - /* Is FillValue warranted? */ - if(!var->no_fill && var->fill_value != NULL) { - /* Make sure _FillValue does not exist */ - for(i=0;iatt);i++) { - fv = (NC_ATT_INFO_T*)ncindexith(var->att,i); - if(strcmp(fv->hdr.name,NC_FillValue)==0) break; - fv = NULL; + size_t count = 0; + + ZTRACE(3,"src=%s typeid=%d",NCJtotext(src,0),typeid); + + /* 3 cases: + (1) singleton atomic value + (2) array of atomic values + (3) other JSON expression + */ + switch (NCJsort(src)) { + case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: /* case 1 */ + count = 1; + if((stat = NCZ_convert1(src, typeid, dst))) + goto done; + break; + + case NCJ_STRING: + if(typeid == NC_CHAR) { + if((stat = charify(src,dst))) goto done; + count = ncbyteslength(dst); + /* Special case for "" */ + if(count == 0) { + ncbytesappend(dst,'\0'); + count = 1; + } + } else { + if((stat = NCZ_convert1(src, typeid, dst))) goto done; + count = 1; + } + break; + + case NCJ_ARRAY: + if(typeid == NC_CHAR) { + if((stat = charify(src,dst))) goto done; + count = ncbyteslength(dst); + } else { + count = NCJarraylength(src); + for(i=0;iatt,NC_FillValue,var->type_info->hdr.id,1,var->fill_value,&fv))) - goto done; - } + break; + default: stat = (THROW(NC_ENCZARR)); goto done; } + if(countp) *countp = count; + +done: + return ZUNTRACE(THROW(stat)); +} + +/* Convert a JSON singleton or array of strings to a single string */ +static int +charify(const NCjson* src, NCbytes* buf) +{ + int stat = NC_NOERR; + size_t i; + struct NCJconst jstr; + + memset(&jstr,0,sizeof(jstr)); + + if(NCJsort(src) != NCJ_ARRAY) { /* singleton */ + NCJcvt(src, NCJ_STRING, &jstr); + ncbytescat(buf,jstr.sval); + } else for(i=0;iformat_file_info; + NCZ_VAR_INFO_T* zvar = NULL; + + if(container->sort == NCVAR) { + var = (NC_VAR_INFO_T*)container; + zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + } + + if(direction == FIXATT) { /* transfer from NC_XXX_INFO_T* to attribute */ + switch (which) { + case DA_FILLVALUE: + if(var->no_fill) { + stat=NCZ_disable_fill(file,var); + goto done; + } + att = (NC_ATT_INFO_T*)ncindexlookup(var->att,NC_FillValue); + /* If _FillValue is new and the proposed value is the dfalt value, then suppress the attribute + and force user to build default fill values; otherwise set the attribute value */ + if(att == NULL && (var->fill_value == NULL || !NCZ_isdfaltfillvalue(var->type_info->hdr.id,var->fill_value))) { + /* Not suppressing _FillValue attribute, so go ahead and create it and set value */ + if((stat = NCZ_getattr(file,container,aname,var->type_info->hdr.id,&att,&isnew))) goto done; + assert(isnew); + if((stat = NCZ_set_att_data(file,att,1,var->fill_value))) goto done; + } + break; + case DA_MAXSTRLEN: + assert(zvar != NULL); + assert(zinfo != NULL); + if(zvar->maxstrlen > 0) { + /* If the value is the current max strlen default, then suppress it */ + if(zvar->maxstrlen != zinfo->default_maxstrlen) { + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&zvar->maxstrlen))) goto done; + } + } break; + case DA_DFALTSTRLEN: + assert(zinfo != NULL); + if(zinfo->default_maxstrlen > 0) { + /* If the value is the global max strlen default, then suppress it */ + if(zinfo->default_maxstrlen != NCZ_MAXSTR_DFALT) { + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&zinfo->default_maxstrlen))) goto done; + } + } break; + case DA_QUANTIZE: + if(var->quantize_mode > 0) { + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&var->nsd))) goto done; + } break; + case DA_SEP: { + NCglobalstate* gs = NC_getglobalstate(); + assert(gs != NULL); + assert(gs->zarr.dimension_separator != 0); + /* If separator is not new and the proposed value is the dfalt value, then suppress the attribute value */ + if(att == NULL && zvar->dimension_separator != '\0' + && zvar->dimension_separator != gs->zarr.dimension_separator) { + if((stat = NCZ_getattr(file,container,aname,NC_CHAR,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&zvar->dimension_separator))) goto done; + } + } break; + case DA_DFALTSEP: { + NCglobalstate* gs = NC_getglobalstate(); + assert(gs != NULL); + assert(gs->zarr.dimension_separator != 0); + /* If separator is not new and the proposed value is the dfalt value, then suppress the attribute value */ + if(att == NULL && gs->zarr.dimension_separator != '\0' + && gs->zarr.dimension_separator != NCZF_default_dimension_separator(file)) { + if((stat = NCZ_getattr(file,container,aname,NC_CHAR,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&gs->zarr.dimension_separator))) goto done; + } + } break; + default: + stat = NC_ENOTATT; + goto done; + } + } else if(direction == FIXOBJ) {/* Transfer value from att to NC_XXX_INFO_T* */ + switch(which) { + case DA_FILLVALUE: + if((stat = NCZ_getattr(file,container,aname,var->type_info->hdr.id,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1) goto done; /* some other fill value attribute */ + if((stat = NC_reclaim_data_all(file->controller,var->type_info->hdr.id,var->fill_value,1))) goto done; + var->fill_value = NULL; + if((stat = NC_copy_data_all(file->controller,att->nc_typeid,att->data,att->len,&var->fill_value))) goto done; + break; + case DA_MAXSTRLEN: + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1 || att->nc_typeid != NC_INT) goto done; /* some other _nczarr_maxstrlen */ + zsetmaxstrlen((size_t)((int*)att->data)[0],var); + break; + case DA_DFALTSTRLEN: + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1 || att->nc_typeid != NC_INT) goto done; /* some other _nczarr_default_maxstrlen */ + zsetdfaltstrlen((size_t)((int*)att->data)[0],file); + break; + case DA_QUANTIZE: + if((stat = NCZ_getattr(file,container,aname,NC_INT,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1 || att->nc_typeid != NC_INT) goto done; /* some other _QuantXXX */ + var->nsd = ((int*)att->data)[0]; + break; + case DA_SEP: + if((stat = NCZ_getattr(file,container,aname,NC_CHAR,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1 || att->nc_typeid != NC_CHAR) goto done; /* some other _nczarr_dimension_separator */ + zsetdimsep(((char*)att->data)[0],var); + break; + case DA_DFALTSEP: + if((stat = NCZ_getattr(file,container,aname,NC_CHAR,&att,&isnew))) goto done; + assert(!isnew); + if(att->len != 1 || att->nc_typeid != NC_CHAR) goto done; /* some other _nczarr_default_dimension_separator */ + zsetdfaltdimsep(((char*)att->data)[0],file); + break; + default: assert(0); + } + } + +done: + return THROW(stat); +} + +/* For those attributes that reflect fields in NC_VAR/GRP_INFO_T objects, + * ensure that those attributes are up-to-date (creating if necessary). + */ +int +NCZ_ensure_dual_attributes(NC_FILE_INFO_T* file, NC_OBJ* container) +{ + int stat = NC_NOERR; + NC_VAR_INFO_T* var = NULL; + + if(container->sort == NCVAR) + var = (NC_VAR_INFO_T*)container; + + /* Some attributes are reflected in var|grp structure so must be sync'd to the attribute */ + if(var != NULL) { + const char* qname = NULL; + + /* _FillValue */ + /* If var->no_fill == NC_NOFILL, then suppress the _FillValue attribute and + optionally set the _NoFill attribute */ + if(var->no_fill == NC_NOFILL) { + if((stat = NCZ_disable_fill(file,var))) goto done; + } else { /* var->no_fill == NC_FILL*/ + if(var->fill_value != NULL) { /* only update if a fill value is defined */ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXATT))) goto done; + } + } + + /* _nczarr_maxstrlen */ + { + NCZ_VAR_INFO_T* vinfo = (NCZ_VAR_INFO_T*)var->format_var_info; + if(vinfo->maxstrlen > 0) { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_NCZARR_MAXSTRLEN_ATTR,DA_MAXSTRLEN,FIXATT))) goto done; + } + } + + /* __Quantizexxx */ + if(var->quantize_mode > 0) { + qname = NC_findquantizeattname(var->quantize_mode); + if(qname != NULL) {/* quantize_mode was set */ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,qname,DA_QUANTIZE,FIXATT))) goto done; + } + } + + /* _nczarr_dimension_separator */ + { + NCZ_VAR_INFO_T* vinfo = (NCZ_VAR_INFO_T*)var->format_var_info; + if(vinfo->dimension_separator != '\0') { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_NCZARR_SEPARATOR_ATTR,DA_SEP,FIXATT))) goto done; + } + } + + } else { + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + NCglobalstate* gs = NC_getglobalstate(); + + assert(file != NULL); + if(zinfo->default_maxstrlen > 0) { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)file->root_grp,NC_NCZARR_DFALT_MAXSTRLEN_ATTR,DA_DFALTSTRLEN,FIXATT))) goto done; + } + + assert(gs != NULL); + assert(gs->zarr.dimension_separator != 0); + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)file->root_grp,NC_NCZARR_DFALT_SEPARATOR_ATTR,DA_DFALTSEP,FIXATT))) goto done; + + } + done: return THROW(stat); } -/* Create an attribute; This is an abbreviated form - of ncz_put_att above */ +/* Get/create attribute */ int -ncz_makeattr(NC_OBJ* container, NCindex* attlist, const char* name, nc_type typeid, size_t len, void* values, NC_ATT_INFO_T** attp) +NCZ_getattr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* aname, nc_type nctype, NC_ATT_INFO_T** attp, int* isnewp) { int stat = NC_NOERR; + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); + NC_GRP_INFO_T* grp = NULL; + NC_VAR_INFO_T* var = NULL; NC_ATT_INFO_T* att = NULL; - NCZ_ATT_INFO_T* zatt = NULL; - void* clone = NULL; - size_t typesize, clonesize; - NC_GRP_INFO_T* grp = (container->sort == NCGRP ? (NC_GRP_INFO_T*)container - : ((NC_VAR_INFO_T*)container)->container); - - /* Duplicate the values */ - if ((stat = nc4_get_typelen_mem(grp->nc4_info, typeid, &typesize))) goto done; - clonesize = len*typesize; - if((clone = malloc(clonesize))==NULL) {stat = NC_ENOMEM; goto done;} - if((stat = NC_copy_data(grp->nc4_info->controller, typeid, values, len, clone))) goto done; - if((stat=nc4_att_list_add(attlist,name,&att))) - goto done; - if((zatt = calloc(1,sizeof(NCZ_ATT_INFO_T))) == NULL) - {stat = NC_ENOMEM; goto done;} + NCindex* attlist = NULL; + if(container->sort == NCGRP) { - zatt->common.file = ((NC_GRP_INFO_T*)container)->nc4_info; - } else if(container->sort == NCVAR) { - zatt->common.file = ((NC_VAR_INFO_T*)container)->container->nc4_info; - } else - abort(); - att->container = container; - att->format_att_info = zatt; - /* Fill in the attribute's type and value */ - att->nc_typeid = typeid; - att->len = len; - att->data = clone; clone = NULL; - att->dirty = NC_TRUE; - if(attp) {*attp = att; att = NULL;} + grp = (NC_GRP_INFO_T*)container; + attlist = grp->att; + } else { /*container->sort == NCVAR*/ + var = (NC_VAR_INFO_T*)container; + attlist = var->att; + } + + att = (NC_ATT_INFO_T*)ncindexlookup(attlist,aname); + if(att == NULL) { /* create it */ + NCZ_clearAttrInfo(file,&ainfo); + ainfo.name = aname; + ainfo.nctype = nctype; + if((stat = ncz_makeattr(file,container,&ainfo,&att))) goto done; + if(isnewp) *isnewp = 1; + } else {if(isnewp) *isnewp = 0;} + + if((stat = islegalatt(file,att,att->len))) goto done; /* verify legality */ + if(attp) *attp = att; done: - nullfree(clone); - if(stat) { - if(att) nc4_att_list_del(attlist,att); - nullfree(zatt); + NCZ_clearAttrInfo(file,&ainfo); + return THROW(stat); +} + +static int +islegalatt(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att, size_t alen) +{ + int stat = NC_NOERR; + if (!att) { + if (!(file->flags & NC_INDEF)) { /* if this is a new att, require define mode. */ + if (file->cmode & NC_CLASSIC_MODEL) {stat = NC_ENOTINDEFINE; goto done;} + file->flags |= NC_INDEF;/* set define mode. */ + file->redef = NC_TRUE; /* for nc_abort, we need to remember if we're in define mode as a redef. */ + } + } else { + /* for an existing att, if we're not in define mode, the len + must not be greater than the existing len for classic model. */ + if(!(file->flags & NC_INDEF) + && alen * nc4typelen(att->nc_typeid) > (size_t)att->len * nc4typelen(att->nc_typeid)) { + if (file->cmode & NC_CLASSIC_MODEL) {stat = NC_ENOTINDEFINE; goto done;} + file->flags |= NC_INDEF;/* set define mode. */ + file->redef = NC_TRUE; /* for nc_abort, we need to remember if we're in define mode as a redef. */ + } } +done: return THROW(stat); } +int +NCZ_reclaim_att_data(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att) +{ + int stat = NC_NOERR; + int tid = att->nc_typeid; + + if(att->data != NULL) { + stat = NC_reclaim_data_all(file->controller,tid,att->data,att->len); + att->data = NULL; + att->len = 0; + } + return stat; +} diff --git a/libnczarr/zchunking.c b/libnczarr/zchunking.c index 442f53e0c7..b1cba19787 100644 --- a/libnczarr/zchunking.c +++ b/libnczarr/zchunking.c @@ -10,7 +10,7 @@ static int pcounter = 0; /* Forward */ -static int compute_intersection(const NCZSlice* slice, size64_t chunklen, unsigned char isunlimited, NCZChunkRange* range); +static int compute_intersection(const NCZSlice* slice, size64_t chunklen, NCZChunkRange* range); static void skipchunk(const NCZSlice* slice, NCZProjection* projection); static int verifyslice(const NCZSlice* slice); @@ -34,11 +34,11 @@ NCZ_compute_chunk_ranges( NCZChunkRange* ncr) { int stat = NC_NOERR; - int i; - int rank = common->rank; + size_t i; + size_t rank = common->rank; for(i=0;ichunklens[i],common->isunlimited[i],&ncr[i]))) + if((stat = compute_intersection(&slices[i],common->chunklens[i],&ncr[i]))) goto done; } @@ -57,7 +57,6 @@ static int compute_intersection( const NCZSlice* slice, size64_t chunklen, - unsigned char isunlimited, NCZChunkRange* range) { range->start = floordiv(slice->start, chunklen); @@ -84,7 +83,7 @@ This is somewhat complex because: */ int -NCZ_compute_projections(struct Common* common, int r, size64_t chunkindex, const NCZSlice* slice, size_t n, NCZProjection* projections) +NCZ_compute_projections(struct Common* common, size_t r, size64_t chunkindex, const NCZSlice* slice, size_t n, NCZProjection* projections) { int stat = NC_NOERR; NCZProjection* projection = NULL; @@ -96,7 +95,8 @@ NCZ_compute_projections(struct Common* common, int r, size64_t chunkindex, cons projection = &projections[n]; if(n > 0) { /* Find last non-skipped projection */ - for(size_t i=n;i-->0;) { /* walk backward */ + size_t i; + for(i=n;--i>=0;) { /* walk backward */ if(!projections[i].skip) { prev = &projections[i]; break; @@ -214,7 +214,7 @@ Create a vector of projections wrt a slice and a sequence of chunks. int NCZ_compute_per_slice_projections( struct Common* common, - int r, /* which dimension are we projecting? */ + size_t r, /* which dimension are we projecting? */ const NCZSlice* slice, /* the slice for which projections are computed */ const NCZChunkRange* range, /* range */ NCZSliceProjections* slp) @@ -258,7 +258,7 @@ NCZ_compute_all_slice_projections( NCZSliceProjections* results) { int stat = NC_NOERR; - int r; + size_t r; for(r=0;rrank;r++) { /* Compute each of the rank SliceProjections instances */ @@ -289,25 +289,13 @@ verifyslice(const NCZSlice* slice) } void -NCZ_clearsliceprojections(int count, NCZSliceProjections* slpv) +NCZ_clearsliceprojections(size_t count, NCZSliceProjections* slpv) { if(slpv != NULL) { - int i; + size_t i; for(i=0;iprojections); } } } - -#if 0 -static void -clearallprojections(NCZAllProjections* nap) -{ - if(nap != NULL) { - int i; - for(i=0;irank;i++) - nclistfreeall(&nap->allprojections[i].projections); - } -} -#endif diff --git a/libnczarr/zchunking.h b/libnczarr/zchunking.h index f3a14703eb..711c9c3a3f 100644 --- a/libnczarr/zchunking.h +++ b/libnczarr/zchunking.h @@ -48,7 +48,7 @@ typedef struct NCProjection { /* Set of Projections for a slice */ typedef struct NCZSliceProjections { - int r; /* 0<=r read, 0 => write */ - int rank; int scalar; /* 1 => scalar variable */ + size_t rank; size64_t dimlens[NC_MAX_VAR_DIMS]; unsigned char isunlimited[NC_MAX_VAR_DIMS]; size64_t chunklens[NC_MAX_VAR_DIMS]; @@ -81,8 +81,8 @@ struct Common { /**************************************************/ /* From zchunking.c */ EXTERNL int NCZ_compute_chunk_ranges(struct Common*, const NCZSlice*, NCZChunkRange* ncr); -EXTERNL int NCZ_compute_projections(struct Common*, int r, size64_t chunkindex, const NCZSlice* slice, size_t n, NCZProjection* projections); -EXTERNL int NCZ_compute_per_slice_projections(struct Common*, int rank, const NCZSlice*, const NCZChunkRange*, NCZSliceProjections* slp); +EXTERNL int NCZ_compute_projections(struct Common*, size_t r, size64_t chunkindex, const NCZSlice* slice, size_t n, NCZProjection* projections); +EXTERNL int NCZ_compute_per_slice_projections(struct Common*, size_t rank, const NCZSlice*, const NCZChunkRange*, NCZSliceProjections* slp); EXTERNL int NCZ_compute_all_slice_projections(struct Common*, const NCZSlice* slices, const NCZChunkRange*, NCZSliceProjections*); /* From zwalk.c */ @@ -98,8 +98,8 @@ EXTERNL size64_t NCZ_computelinearoffset(size_t, const size64_t*, const size64_t struct Common; struct NCZOdometer; EXTERNL int NCZ_projectslices(struct Common*, NCZSlice* slices, struct NCZOdometer**); -EXTERNL int NCZ_chunkindexodom(int rank, const NCZChunkRange* ranges, size64_t*, struct NCZOdometer** odom); -EXTERNL void NCZ_clearsliceprojections(int count, NCZSliceProjections* slpv); +EXTERNL int NCZ_chunkindexodom(size_t rank, const NCZChunkRange* ranges, size64_t*, struct NCZOdometer** odom); +EXTERNL void NCZ_clearsliceprojections(size_t count, NCZSliceProjections* slpv); EXTERNL void NCZ_clearcommon(struct Common* common); #define floordiv(x,y) ((x) / (y)) diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index 3dbba0d6be..b418235fcc 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -38,7 +38,7 @@ ncz_close_file(NC_FILE_INFO_T* file, int abort) if(!abort) { /* Flush | create all chunks for all vars */ - if((stat=zwrite_vars(file->root_grp))) goto done; + if((stat=zwrite_vars(file->root_grp))) goto done; } /* Internal close to reclaim zarr annotations */ @@ -47,10 +47,16 @@ ncz_close_file(NC_FILE_INFO_T* file, int abort) zinfo = file->format_file_info; - if((stat = nczmap_close(zinfo->map,(abort && zinfo->creating)?1:0))) - goto done; - nclistfreeall(zinfo->controllist); + /* Reclaim the metadata handler contents */ + if(NCZMD_is_metadata_consolidated(file) == NC_NOERR) + NCZMD_close(file); + + /* Release the zmap handler */ + if((stat = nczmap_close(zinfo->map,(abort && zinfo->creating)?1:0))) goto done; + + nclistfreeall(zinfo->urlcontrols); NC_authfree(zinfo->auth); + nullfree(zinfo); done: @@ -100,12 +106,8 @@ zclose_group(NC_GRP_INFO_T *grp) if ((stat = zclose_types(grp))) goto done; - /* Close the zgroup. */ + /* Close the zarr.json. */ zgrp = grp->format_grp_info; - LOG((4, "%s: closing group %s", __func__, grp->hdr.name)); - nullfree(zgrp->zgroup.prefix); - NCJreclaim(zgrp->zgroup.obj); - NCJreclaim(zgrp->zgroup.atts); nullfree(zgrp); grp->format_grp_info = NULL; /* avoid memory errors */ @@ -127,6 +129,7 @@ zclose_gatts(NC_GRP_INFO_T* grp) int stat = NC_NOERR; NC_ATT_INFO_T *att; size_t a; + for(a = 0; a < ncindexsize(grp->att); a++) { NCZ_ATT_INFO_T* zatt = NULL; att = (NC_ATT_INFO_T* )ncindexith(grp->att, a); @@ -168,18 +171,17 @@ NCZ_zclose_var1(NC_VAR_INFO_T* var) if(var->filters != NULL) { (void)NCZ_filter_freelists(var); } - var->filters = NULL; #endif + var->filters = NULL; /* Reclaim the type */ if(var->type_info) (void)zclose_type(var->type_info); /* reclaim dispatch info */ zvar = var->format_var_info;; if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); /* reclaim xarray */ - if(zvar->xarray) nclistfreeall(zvar->xarray); - nullfree(zvar->zarray.prefix); - NCJreclaim(zvar->zarray.obj); - NCJreclaim(zvar->zarray.atts); + if(zvar->dimension_names) nclistfreeall(zvar->dimension_names); + + /* Reclaim the zvar object */ nullfree(zvar); var->format_var_info = NULL; /* avoid memory errors */ return stat; @@ -304,7 +306,7 @@ zwrite_vars(NC_GRP_INFO_T *grp) /* Write all vars for this group breadth first */ for(i = 0; i < ncindexsize(grp->vars); i++) { NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i); - if((stat = ncz_write_var(var))) goto done; + if((stat = NCZ_write_var_data(grp->nc4_info, var))) goto done; } /* Recursively call this function for each child group, if any, stopping diff --git a/libnczarr/zcreate.c b/libnczarr/zcreate.c index bf7c1db3e5..f1a1b8fff9 100644 --- a/libnczarr/zcreate.c +++ b/libnczarr/zcreate.c @@ -30,7 +30,7 @@ static const int ILLEGAL_CREATE_FLAGS = (NC_NOWRITE|NC_MMAP|NC_DISKLESS|NC_64BIT * @author Dennis Heimbigner, Ed Hartnett */ static int -ncz_create_file(const char *path, int cmode, size_t initialsz, NClist* controls, int ncid) +ncz_create_file(const char *path, int cmode, NClist* controls, int ncid) { int retval = NC_NOERR; NC_FILE_INFO_T* h5 = NULL; @@ -42,7 +42,7 @@ ncz_create_file(const char *path, int cmode, size_t initialsz, NClist* controls, if ((retval = nc4_file_list_add(ncid, path, cmode, (void**)&h5))) BAIL(retval); assert(h5 && h5->root_grp); - h5->root_grp->atts_read = 1; + NCZ_setatts_read((NC_OBJ*)h5->root_grp); h5->mem.inmemory = ((cmode & NC_INMEMORY) == NC_INMEMORY); h5->mem.diskless = ((cmode & NC_DISKLESS) == NC_DISKLESS); @@ -94,6 +94,11 @@ NCZ_create(const char* path, int cmode, size_t initialsz, int basepe, int stat = NC_NOERR; NCURI* uri = NULL; + NC_UNUSED(initialsz); + NC_UNUSED(basepe); + NC_UNUSED(chunksizehintp); + NC_UNUSED(dispatch); + ZTRACE(0,"path=%s,cmode=%d,initialsz=%ld,ncid=%d)",path,cmode,initialsz,ncid); NC_UNUSED(parameters); @@ -124,7 +129,7 @@ NCZ_create(const char* path, int cmode, size_t initialsz, int basepe, if(uri == NULL) goto done; /* Create the file */ - stat = ncz_create_file(path, cmode, initialsz, ncurifragmentparams(uri), ncid); + stat = ncz_create_file(path, cmode, ncurifragmentparams(uri), ncid); done: ncurifree(uri); diff --git a/libnczarr/zcvt.c b/libnczarr/zcvt.c index 879c5e8c20..2151759a46 100644 --- a/libnczarr/zcvt.c +++ b/libnczarr/zcvt.c @@ -35,71 +35,7 @@ sizeof(char *), /*NC_STRING*/ static int typeid2jtype(nc_type typeid); static int naninftest(const char* s, double* dcase, float* fcase); -#if 0 -/* Convert a JSON value to a struct ZCVT value and also return the type */ -int -NCZ_string2cvt(char* src, nc_type srctype, struct ZCVT* zcvt, nc_type* typeidp) -{ - int stat = NC_NOERR; - nc_type dsttype = NC_NAT; - - assert(zcvt); - - /* Convert to a restricted set of values */ - switch (srctype) { - case NC_BYTE: { - zcvt->int64v = (signed long long)(*((signed char*)src)); - dsttype = NC_INT64; - } break; - case NC_UBYTE: { - zcvt->uint64v = (unsigned long long)(*((unsigned char*)src)); - dsttype = NC_UINT64; - } break; - case NC_SHORT: { - zcvt->int64v = (signed long long)(*((signed short*)src)); - dsttype = NC_INT64; - } break; - case NC_USHORT: { - zcvt->uint64v = (unsigned long long)(*((unsigned short*)src)); - dsttype = NC_UINT64; - } break; - case NC_INT: { - zcvt->int64v = (signed long long)(*((signed int*)src)); - dsttype = NC_INT64; - } break; - case NC_UINT: { - zcvt->uint64v = (unsigned long long)(*((unsigned int*)src)); - dsttype = NC_UINT64; - } break; - case NC_INT64: { - zcvt->int64v = (signed long long)(*((signed long long*)src)); - dsttype = NC_INT64; - } break; - case NC_UINT64: { - zcvt->uint64v = (unsigned long long)(*((unsigned long long*)src)); - dsttype = NC_UINT64; - } break; - case NC_FLOAT: { - zcvt->float64v = (double)(*((float*)src)); - dsttype = NC_DOUBLE; - } break; - case NC_DOUBLE: { - dsttype = NC_DOUBLE; - zcvt->float64v= (double)(*((double*)src)); - } break; - case NC_STRING: { - dsttype = NC_STRING; - zcvt->strv= *((char**)src); - } break; - default: stat = NC_EINTERNAL; goto done; - } - if(typeidp) *typeidp = dsttype; -done: - return stat; -} -#endif - -/* Warning: not free returned zcvt.strv; it may point into a string in jsrc */ +/* Warning: do not free returned zcvt.strv; it may point into a string in jsrc */ int NCZ_json2cvt(const NCjson* jsrc, struct ZCVT* zcvt, nc_type* typeidp) { @@ -193,6 +129,11 @@ NCZ_convert1(const NCjson* jsrc, nc_type dsttype, NCbytes* buf) c = (signed char)zcvt.uint64v; ncbytesappend(buf,(char)c); break; + case NC_STRING: + if(strlen(zcvt.strv) > 1) outofrange = 1; + c = zcvt.strv[0]; + ncbytesappend(buf,(char)c); + break; default: abort(); } } break; @@ -212,6 +153,11 @@ NCZ_convert1(const NCjson* jsrc, nc_type dsttype, NCbytes* buf) c = (unsigned char)zcvt.uint64v; ncbytesappend(buf,(char)c); break; + case NC_STRING: + if(strlen(zcvt.strv) > 1) outofrange = 1; + c = (unsigned char)zcvt.strv[0]; + ncbytesappend(buf,(char)c); + break; default: abort(); } } break; @@ -382,6 +328,7 @@ NCZ_convert1(const NCjson* jsrc, nc_type dsttype, NCbytes* buf) if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} /* Need to append the pointer and not what it points to */ scopy = nulldup(zcvt.strv); + /* Note we are appending the pointer not the scopy string */ ncbytesappendn(buf,(void*)&scopy,sizeof(scopy)); scopy = NULL; } break; @@ -421,7 +368,6 @@ NCZ_stringconvert1(nc_type srctype, char* src, NCjson* jvalue) struct ZCVT zcvt; nc_type dsttype = NC_NAT; char s[1024]; - char sq[1024+2+1]; char* p = NULL; int isnanorinf = 0; @@ -509,12 +455,8 @@ NCZ_stringconvert1(nc_type srctype, char* src, NCjson* jvalue) #endif /* Quote the nan/inf constant */ if(isnanorinf) { - size_t l = strlen(s); - memcpy(sq,s,l+1); - s[0] = '"'; - memcpy(s+1,sq,l); - s[l+1] = '"'; - s[l+2] = '\0'; + /* Change type to NCJ_STRING */ + jvalue->sort = NCJ_STRING; } } break; case NC_STRING: { @@ -551,18 +493,16 @@ NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) /* Handle char type specially */ if(typeid == NC_CHAR) { - /* Apply the JSON write convention */ - if((stat = NCJparsen(len,src,0,&jdata))) { /* !parseable */ - /* Create a string valued json object */ - if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata))) goto done; - } + /* Not complex json */ + /* Create a string valued json object */ + NCJnewstringn(NCJ_STRING,len,src,&jdata); } else if(len == 1) { /* create singleton */ - if((stat = NCJnew(jtype,&jdata))) goto done; + NCJnew(jtype,&jdata); if((stat = NCZ_stringconvert1(typeid, src, jdata))) goto done; } else { /* len > 1 create array of values */ - if((stat = NCJnew(NCJ_ARRAY,&jdata))) goto done; + NCJnew(NCJ_ARRAY,&jdata); for(i=0;irank); + snprintf(value,sizeof(value),"Odometer{rank=%zu ",odom->rank); ncbytescat(buf,value); ncbytescat(buf," start="); - txt = nczprint_vector(odom->rank,odom->start); + txt = nczprint_vector((size_t)odom->rank,odom->start); ncbytescat(buf,txt); ncbytescat(buf," stop="); - txt = nczprint_vector(odom->rank,odom->stop); + txt = nczprint_vector((size_t)odom->rank,odom->stop); ncbytescat(buf,txt); ncbytescat(buf," len="); - txt = nczprint_vector(odom->rank,odom->len); + txt = nczprint_vector((size_t)odom->rank,odom->len); ncbytescat(buf,txt); ncbytescat(buf," stride="); - txt = nczprint_vector(odom->rank,odom->stride); + txt = nczprint_vector((size_t)odom->rank,odom->stride); ncbytescat(buf,txt); ncbytescat(buf," index="); - txt = nczprint_vector(odom->rank,odom->index); + txt = nczprint_vector((size_t)odom->rank,odom->index); ncbytescat(buf,txt); ncbytescat(buf," offset="); snprintf(value,sizeof(value),"%llu",nczodom_offset(odom)); @@ -249,9 +252,9 @@ nczprint_sliceprojectionsx(const NCZSliceProjections slp, int raw) char* result = NULL; NCbytes* buf = ncbytesnew(); char tmp[4096]; - int i; + size_t i; - snprintf(tmp,sizeof(tmp),"SliceProjection{r=%d range=%s count=%ld", + snprintf(tmp,sizeof(tmp),"SliceProjection{r=%zu range=%s count=%ld", slp.r,nczprint_chunkrange(slp.range),(long)slp.count); ncbytescat(buf,tmp); ncbytescat(buf,",projections=[\n"); @@ -294,7 +297,7 @@ nczprint_idvector(size_t len, const int* ids) { size64_t v[4096]; size_t i; - for(i=0;i 0) ncbytescat(buf,","); + ncbytescat(buf,"'"); + ncbytescat(buf,e); + ncbytescat(buf,"'"); + } + } + ncbytescat(buf,")"); + result = ncbytesextract(buf); + ncbytesfree(buf); + return capture(result); +} + void zdumpcommon(const struct Common* c) { - int r; + size_t r; fprintf(stderr,"Common:\n"); #if 0 fprintf(stderr,"\tfile: %s\n",c->file->controller->path); fprintf(stderr,"\tvar: %s\n",c->var->hdr.name); fprintf(stderr,"\treading=%d\n",c->reading); #endif - fprintf(stderr,"\trank=%d",c->rank); + fprintf(stderr,"\trank=%zd",c->rank); fprintf(stderr," dimlens=%s",nczprint_vector(c->rank,c->dimlens)); fprintf(stderr," chunklens=%s",nczprint_vector(c->rank,c->chunklens)); #if 0 @@ -380,6 +406,6 @@ zdumpcommon(const struct Common* c) fprintf(stderr," shape=%s\n",nczprint_vector(c->rank,c->shape)); fprintf(stderr,"\tallprojections:\n"); for(r=0;rrank;r++) - fprintf(stderr,"\t\t[%d] %s\n",r,nczprint_sliceprojectionsx(c->allprojections[r],RAW)); + fprintf(stderr,"\t\t[%zd] %s\n",r,nczprint_sliceprojectionsx(c->allprojections[r],RAW)); fflush(stderr); } diff --git a/libnczarr/zdebug.h b/libnczarr/zdebug.h index 83f45cdef3..3c853d57b9 100644 --- a/libnczarr/zdebug.h +++ b/libnczarr/zdebug.h @@ -62,6 +62,7 @@ EXTERNL char* nczprint_idvector(size_t,const int*); EXTERNL char* nczprint_paramvector(size_t,const unsigned*); EXTERNL char* nczprint_sizevector(size_t,const size_t*); EXTERNL char* nczprint_envv(const char** envv); +EXTERNL char* nczprint_envlist(const NClist* l); EXTERNL void zdumpcommon(const struct Common*); diff --git a/libnczarr/zdim.c b/libnczarr/zdim.c index 352d4a2e25..16ad429064 100644 --- a/libnczarr/zdim.c +++ b/libnczarr/zdim.c @@ -96,7 +96,7 @@ NCZ_def_dim(int ncid, const char *name, size_t len, int *idp) /* Make sure the name is not already in use. */ dim = (NC_DIM_INFO_T*)ncindexlookup(grp->dim,norm_name); if(dim != NULL) - return NC_ENAMEINUSE; + return THROW(NC_ENAMEINUSE); /* If it's not in define mode, enter define mode. Do this only * after checking all input data, so we only enter define mode if @@ -107,8 +107,7 @@ NCZ_def_dim(int ncid, const char *name, size_t len, int *idp) /* Add a dimension to the list. The ID must come from the file * information, since dimids are visible in more than one group. */ - if ((stat = nc4_dim_list_add(grp, norm_name, len, -1, &dim))) - return stat; + if ((stat = nc4_dim_list_add(grp, norm_name, len, -1, &dim))) return stat; { NCZ_DIM_INFO_T* diminfo = NULL; @@ -257,7 +256,7 @@ NCZ_rename_dim(int ncid, int dimid, const char *name) /* Check if new name is in use. */ if (ncindexlookup(grp->dim, norm_name)) - return NC_ENAMEINUSE; + return THROW(NC_ENAMEINUSE); /* Give the dimension its new name in metadata. UTF8 normalization * has been done. */ @@ -273,3 +272,14 @@ NCZ_rename_dim(int ncid, int dimid, const char *name) return NC_NOERR; } + +int +NCZ_reclaim_dim(NC_DIM_INFO_T* dim) +{ + int stat = NC_NOERR; + if(dim != NULL) { + nullfree(dim->format_dim_info); + stat = nc4_dim_list_del(dim->container,dim); + } + return stat; +} diff --git a/libnczarr/zdispatch.c b/libnczarr/zdispatch.c index 80f9c6e27b..561abfe637 100644 --- a/libnczarr/zdispatch.c +++ b/libnczarr/zdispatch.c @@ -128,6 +128,7 @@ NCZ_initialize(void) if (!ncz_initialized) NCZ_initialize_internal(); stat = NCZ_provenance_init(); + if(!stat) NCZF_initialize(); if(stat) ncz_initialized = 1; return stat; } @@ -141,6 +142,7 @@ NCZ_initialize(void) int NCZ_finalize(void) { + NCZF_finalize(); NCZ_finalize_internal(); NCZ_provenance_finalize(); return NC_NOERR; @@ -149,12 +151,16 @@ NCZ_finalize(void) static int NCZ_var_par_access(int ncid, int varid, int par_access) { + NC_UNUSED(ncid); + NC_UNUSED(varid); + NC_UNUSED(par_access); return NC_NOERR; /* no-op */ } static int NCZ_show_metadata(int ncid) { + NC_UNUSED(ncid); return NC_NOERR; } diff --git a/libnczarr/zfile.c b/libnczarr/zfile.c index 343960da1f..07163b76c4 100644 --- a/libnczarr/zfile.c +++ b/libnczarr/zfile.c @@ -32,30 +32,30 @@ static int ncz_sync_netcdf4_file(NC_FILE_INFO_T* file, int isclose); int NCZ_redef(int ncid) { - NC_FILE_INFO_T* zinfo = NULL; + NC_FILE_INFO_T* file = NULL; int stat = NC_NOERR; ZTRACE(0,"NCZ_redef(ncid)"); /* Find this file's metadata. */ - if ((stat = nc4_find_grp_h5(ncid, NULL, &zinfo))) + if ((stat = nc4_find_grp_h5(ncid, NULL, &file))) goto done; - assert(zinfo); + assert(file); /* If we're already in define mode, return an error. */ - if (zinfo->flags & NC_INDEF) + if (file->flags & NC_INDEF) {stat = NC_EINDEFINE; goto done;} /* If the file is read-only, return an error. */ - if (zinfo->no_write) + if (file->no_write) {stat = NC_EPERM; goto done;} /* Set define mode. */ - zinfo->flags |= NC_INDEF; + file->flags |= NC_INDEF; /* For nc_abort, we need to remember if we're in define mode as a redef. */ - zinfo->redef = NC_TRUE; + file->redef = NC_TRUE; done: return ZUNTRACE(stat); @@ -75,12 +75,17 @@ NCZ_redef(int ncid) * @author Dennis Heimbigner, Ed Hartnett */ int -NCZ__enddef(int ncid, size_t h_minfree, size_t v_align, - size_t v_minfree, size_t r_align) +NCZ__enddef(int ncid, size_t h_minfree, size_t v_align, size_t v_minfree, size_t r_align) { int stat = NC_NOERR; NC_FILE_INFO_T* h5 = NULL; NC_GRP_INFO_T* grp = NULL; + + NC_UNUSED(h_minfree); + NC_UNUSED(v_align); + NC_UNUSED(v_minfree); + NC_UNUSED(r_align); + ZTRACE(0,"ncid=%d",ncid); if ((stat = nc4_find_grp_h5(ncid, &grp, &h5))) goto done; @@ -160,7 +165,7 @@ NCZ_sync(int ncid) /* do not do this if file is writeonce */ stat = ncz_sync_netcdf4_file(file,!ZCLOSE); - return stat; + return ZUNTRACE(stat); } /** @@ -212,7 +217,7 @@ NCZ_close(int ncid, void* params) if ((stat = nc4_find_grp_h5(ncid, NULL, &h5))) return stat; assert(h5); - return ncz_closeorabort(h5, params, 0); + return ZUNTRACE(ncz_closeorabort(h5, params, 0)); } /** @@ -316,11 +321,6 @@ NCZ_inq(int ncid, int *ndimsp, int *nvarsp, int *nattsp, int *unlimdimidp) } if (nattsp) { - /* Do we need to read the atts? */ - if (!grp->atts_read) - if ((stat = ncz_read_atts(file,(NC_OBJ*)grp))) - return stat; - *nattsp = ncindexcount(grp->att); } @@ -364,6 +364,7 @@ ncz_sync_netcdf4_file(NC_FILE_INFO_T* file, int isclose) int stat = NC_NOERR; assert(file && file->format_file_info); + LOG((3, "%s", __func__)); ZTRACE(2,"file=%s",file->hdr.name); @@ -391,9 +392,9 @@ ncz_sync_netcdf4_file(NC_FILE_INFO_T* file, int isclose) if((stat = NCZ_write_provenance(file))) goto done; - /* Write all the metadata. */ - if((stat = ncz_sync_file(file,isclose))) - goto done; + /* Write out meta-data if we are closing as opposed to enddef() */ + if(isclose) + {if((stat = ncz_encode_file(file,1))) goto done;} } done: return ZUNTRACE(stat); @@ -447,8 +448,7 @@ NCZ_set_fill(int ncid, int fillmode, int *old_modep) ZTRACE(0,"NCZ_set_fill(ncid,fillmode,old)"); /* Get pointer to file info. */ - if ((stat = nc4_find_grp_h5(ncid, NULL, &h5))) - goto done; + if ((stat = nc4_find_grp_h5(ncid, NULL, &h5))) goto done; assert(h5); /* Trying to set fill on a read-only file? You sicken me! */ diff --git a/libnczarr/zfill.c b/libnczarr/zfill.c new file mode 100644 index 0000000000..a18541d3fd --- /dev/null +++ b/libnczarr/zfill.c @@ -0,0 +1,131 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" +#include "zfill.h" + +/**************************************************/ +/* Make a strut of the possible fill values indexed by NC_NAT..NC_STRING. + Note that we do not use a union because it cannot be statically initialized. +*/ +static struct DFALTFILL { + int natv; + char bytev; + char charv; + short shortv; + int intv; + float floatv; + double doublev; + unsigned char ubytev; + unsigned short ushortv; + unsigned int uintv; + long long int64v; + unsigned long long uint64v; + char* stringv; +} dfaltfill = { + 0, /*NC_NAT*/ + NC_FILL_BYTE, /*NC_BYTE*/ + NC_FILL_CHAR, /*NC_CHAR*/ + NC_FILL_SHORT, /*NC_SHORT*/ + NC_FILL_INT, /*NC_INT*/ + NC_FILL_FLOAT, /*NC_FLOAT*/ + NC_FILL_DOUBLE, /*NC_DOUBLE*/ + NC_FILL_UBYTE, /*NC_UBYTE*/ + NC_FILL_USHORT, /*NC_USHORT*/ + NC_FILL_UINT, /*NC_UINT*/ + NC_FILL_INT64, /*NC_INT64*/ + NC_FILL_UINT64, /*NC_UINT64*/ + NC_FILL_STRING /*NC_STRING*/ +}; + +/**************************************************/ +/**************************************************/ + + +/* Turn off var.no_fill and var.fill_value. Sync with attribute */ +int +NCZ_disable_fill(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) +{ + int stat = NC_NOERR; + nc_type tid = var->type_info->hdr.id; + NC_ATT_INFO_T* att = NULL; + int isnew = 0; + + /* Reclaim the fill value, if any */ + if(var->fill_value != NULL) { + if((stat = NC_reclaim_data_all(file->controller,tid,var->fill_value,1))) goto done; + var->fill_value = NULL; + } + + /* And the fill_chunk */ + if((stat = NCZ_reclaim_fill_chunk(((NCZ_VAR_INFO_T*)var->format_var_info)->cache))); /* Reclaim any existing fill_chunk */ + + /* And kill off the _FillValue attribute */ + if((stat = NCZ_attr_delete(file,var->att,NC_FillValue))); + if (stat && stat != NC_ENOTATT) goto done; + stat = NC_NOERR; + + /* set the _NoFill attribute + iff var->no_fill was NC_FILL and file->fill_mode == NC_NOFILL */ + if(var->no_fill == NC_FILL && file->fill_mode == NC_NOFILL) { + int nofill = 1; + if((stat = NCZ_getattr(file,(NC_OBJ*)var, "_NoFill", NC_INT,&att,&isnew))) goto done; + if((stat = NCZ_set_att_data(file,att,1,&nofill))) goto done; + } + + if(var->no_fill == NC_FILL) var->fill_val_changed = 1; + var->no_fill = NC_NOFILL; + +done: + return THROW(stat); +} + +/**************************************************/ + +/* get the default fillvalue */ +void* +NCZ_getdfaltfillvalue(nc_type nctype) +{ + void* fillval = NULL; + switch (nctype) { + case NC_BYTE: fillval = (void*)&dfaltfill.bytev; break; + case NC_CHAR: fillval = (void*)&dfaltfill.charv; break; + case NC_SHORT: fillval = (void*)&dfaltfill.shortv; break; + case NC_INT: fillval = (void*)&dfaltfill.intv; break; + case NC_FLOAT: fillval = (void*)&dfaltfill.floatv; break; + case NC_DOUBLE: fillval = (void*)&dfaltfill.doublev; break; + case NC_UBYTE: fillval = (void*)&dfaltfill.ubytev; break; + case NC_USHORT: fillval = (void*)&dfaltfill.ushortv; break; + case NC_UINT: fillval = (void*)&dfaltfill.uintv; break; + case NC_INT64: fillval = (void*)&dfaltfill.int64v; break; + case NC_UINT64: fillval = (void*)&dfaltfill.uint64v; break; + case NC_STRING: fillval = (void*)&dfaltfill.stringv; break; + default: break; + } + return fillval; +} + +/* Test if fillvalue is default */ +int +NCZ_isdfaltfillvalue(nc_type nctype, void* fillval) +{ + switch (nctype) { + case NC_BYTE: if(NC_FILL_BYTE == *((signed char*)fillval)) return 1; break; + case NC_CHAR: if(NC_FILL_CHAR == *((char*)fillval)) return 1; break; + case NC_SHORT: if(NC_FILL_SHORT == *((short*)fillval)) return 1; break; + case NC_INT: if(NC_FILL_INT == *((int*)fillval)) return 1; break; + case NC_FLOAT: if(NC_FILL_FLOAT == *((float*)fillval)) return 1; break; + case NC_DOUBLE: if(NC_FILL_DOUBLE == *((double*)fillval)) return 1; break; + case NC_UBYTE: if(NC_FILL_UBYTE == *((unsigned char*)fillval)) return 1; break; + case NC_USHORT: if(NC_FILL_USHORT == *((unsigned short*)fillval)) return 1; break; + case NC_UINT: if(NC_FILL_UINT == *((unsigned int*)fillval)) return 1; break; + case NC_INT64: if(NC_FILL_INT64 == *((long long int*)fillval)) return 1; break; + case NC_UINT64: if(NC_FILL_UINT64 == *((unsigned long long int*)fillval)) return 1; break; + case NC_STRING: if(strcmp(NC_FILL_STRING,*((char**)fillval))) return 1; break; + default: break; + } + return 0; +} + diff --git a/libnczarr/zfill.h b/libnczarr/zfill.h new file mode 100644 index 0000000000..c9bf7ca217 --- /dev/null +++ b/libnczarr/zfill.h @@ -0,0 +1,86 @@ + +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +/** +Variable fill_values require some special effort. +Specifically, a variable's fill_value occurs in three places. +1. The NC_VAR_INFO_T object's "fill_value" field containing + the netcdf-4 meta-data for a variable. +2. The _FillValue attribute +3. The "fill_value" key in the (NC)Zarr metadata for an array. + +It is necessary to ensure that all three values are properly synchronized. +The fill_value is read/set in the following ways: +1. The nc_def_var_fill API function. +2. The nc_put_att API function when the attibute is set on a variable + and the attribute name is "_FillValue". +3. Reading the fill_value key when opening an (NC)Zarr file. +4. Writing the fill_value key when creating an (NC)Zarr file. + +The rules used here are as follows. +## Using nc_def_var_fill +1. If the no_fill argument is set, then + 1. reclaim the NC_VAR_INFO_T.fill_value_field + 2. set NC_VAR_INFO_T.fill_val_changed field. +2. If no_fill is not set, then + 1. Convert the data value argument to the same type as the variable's type. + 2. store the fill value argument into the NC_VAR_INFO_T.fill_value field (overwriting any existing value) + 3. set NC_VAR_INFO_T.fill_val_changed. +3. Synchronize + 1. Create or overwrite the _FillValue attribute to have same value +as NC_VAR_INFO_T.fill_value. + +## Using nc_put_att when att is _FillValue +1. unset NC_VAR_INFO_T.no_fill field +2. create/overwrite the attribute + 1. Convert the attribute data to the same type as the variable's type. + 2. Create or overwrite the _FillValue attribute to the converted value +and type. +3. Synchronize + 1. store the attribute data (and type) into the NC_VAR_INFO_T.fill_value field (overwriting any existing value) + 2. set NC_VAR_INFO_T.fill_val_changed. + +## Reading (NC)Zarr metadata +1. If the _FillValue attribute is encountered, then ignore it in favor + of whatever the "fill_value" key dictates. +2. If the "fill_value" key in the variable's metadata does not exist +or is NULL, then + 1. set the NC_VAR_INFO_T.no_fill field + 2. unset the NC_VAR_INFO_T.fill_val_changed field. +3. If the "fill_value" key is defined, then + 1. compile the key's value to void* + 2. convert the compiled value to the variable's type + 3. store the attribute data (and type) into the NC_VAR_INFO_T.fill_value field (overwriting any existing value) + 4. set NC_VAR_INFO_T.fill_val_changed. +4. Syncronize + 1. Create or overwrite the _FillValue attribute to have same value +as NC_VAR_INFO_T.fill_value. + +## Writing (NC)Zarr metadata +1. If the _FillValue attribute is defined, then ignore it in favor + of whatever the NC_VAR_INFO_T.fill_value field dictates. +2. If NC_VAR_INFO_T.no_fill is set, then write the "fill_value" +key with a NULL value. +3. If NC_VAR_INFO_T.no_fill is not set, then + 1. convert the NC_VAR_INFO_T.fill_value field to the corresponding +JSON value + 2. write fill_value to the (NC)Zarr array metadata. +*/ + +#ifndef ZFILL_H +#define ZFILL_H + +EXTERNL int NCZ_disable_fill(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var); +EXTERNL void* NCZ_getdfaltfillvalue(nc_type nctype); +EXTERNL int NCZ_isdfaltfillvalue(nc_type nctype, void* value); + +#if 0 +EXTERNL int NCZ_set_fill_value(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int no_fill, const void* fillvalue); +EXTERNL int NCZ_set_fill_att(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NC_ATT_INFO_T* att, int no_fill, const void* fillvalue); +#endif + +#endif /*ZFILL_H*/ + diff --git a/libnczarr/zfilter.c b/libnczarr/zfilter.c index 86e2a9dd95..d142e45d55 100644 --- a/libnczarr/zfilter.c +++ b/libnczarr/zfilter.c @@ -49,6 +49,7 @@ #include "ncpathmgr.h" #include "ncpoco.h" #include "netcdf_filter.h" +#include "netcdf_aux.h" #include "netcdf_filter_build.h" #include "zfilter.h" #include "zplugins.h" @@ -64,78 +65,31 @@ #define NULLIFY(x) ((x)?(x):"NULL") -/* The NC_VAR_INFO_T->filters field is an NClist of this struct */ -/* -Each filter can have two parts: HDF5 and Codec. -The NC_VAR_INFO_T.filters list only holds entries where both the HDF5 info -and the codec info are defined. -The NCZ_VAR_INFO_T.codecs list holds the codec info when reading a Zarr file. -Note that it is not possible to have an entry on the filters list that does not -have both HDF5 and codec. This is because nc_def_var_filter will fail if the codec -part is not available. If a codec is read from a file and there is no available -corresponding HDF5 implementation, then that codec will not appear in the filters list. -It is possible that some subset of the codecs do have a corresponding HDF5, but we -enforce the rule that no entries go into the filters list unless all are defined. -It is still desirable for a user to be able to see what filters and codecs are defined -for a variable. This is accommodated by providing two special attributes: -1, "_Filters" attribute shows the HDF5 filters defined on the variable, if any. -2, "_Codecs" attribute shows the codecs defined on the variable; for zarr, this list - should always be defined. -*/ - -/* Codec Info */ -typedef struct NCZ_Codec { - char* id; /**< The NumCodecs ID */ - char* codec; /**< The Codec from the file; NULL if creating */ -} NCZ_Codec; - -static NCZ_Codec codec_empty = {NULL, NULL}; - -static void -ncz_codec_clear(NCZ_Codec* codec) { - nullfree(codec->id); nullfree(codec->codec); - *codec = codec_empty; +NCZ_Codec +NCZ_codec_empty(void) +{ + NCZ_Codec empty = {NULL, NULL, 0}; + return empty; } -typedef struct NCZ_Params {size_t nparams; unsigned* params;} NCZ_Params; - -/* HDF5 Info */ -typedef struct NCZ_HDF5 { - unsigned id; /**< HDF5 id corresponding to filterid. */ - NCZ_Params visible; - NCZ_Params working; -} NCZ_HDF5; - -static NCZ_HDF5 hdf5_empty = {0, {0,NULL}, {0,NULL}}; - -static void -ncz_hdf5_clear(NCZ_HDF5* h) { - nullfree(h->visible.params); - nullfree(h->working.params); - *h = hdf5_empty; +NCZ_HDF5 +NCZ_hdf5_empty(void) +{ + NCZ_HDF5 empty = {0, {0,NULL}, {0,NULL}}; + return empty; } -typedef struct NCZ_Filter { - int flags; /**< Flags describing state of this filter. */ -# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */ -# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */ -# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */ -# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */ -# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */ -# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */ -# define FLAG_SUPPRESS 64 /* If set, => filter should not be used (probably because variable is not fixed size */ - NCZ_HDF5 hdf5; - NCZ_Codec codec; - struct NCZ_Plugin* plugin; /**< Implementation of this filter. */ - int chainindex; /* Position in original chain */ -} NCZ_Filter; +/**************************************************/ #define FILTERINCOMPLETE(f) ((f)->flags & FLAG_INCOMPLETE?1:0) +#define FILTERSUPPRESSED(f) ((f)->flags & FLAG_SUPPRESS?1:0) /* WARNING: GLOBAL DATA */ /* TODO: move to common global state */ +#ifdef NETCDF_ENABLE_NCZARR_FILTERS static int NCZ_filter_initialized = 0; +#endif /**************************************************/ @@ -159,6 +113,42 @@ NCJtrace(const NCjson* j) #endif #if defined(DEBUGF) || defined(DEBUGL) + +static char* +printparams(size_t nparams, const unsigned* params) +{ + static char ppbuf[4096]; + if(nparams == 0) + snprintf(ppbuf,4096,"{0,%p}",params); + else + snprintf(ppbuf,4096,"{%u %s}",(unsigned)nparams,nczprint_paramvector(nparams,params)); + return ppbuf; +} + +static char* +printnczparams(const NCZ_Params p) +{ + return printparams(p.nparams,p.params); +} + +static const char* +printcodec(const NCZ_Codec c) +{ + static char pcbuf[4096]; + snprintf(pcbuf,sizeof(pcbuf),"{id=%s codec=%s}", + c.id,NULLIFY(c.codec)); + return pcbuf; +} + +static const char* +printhdf5(const NCZ_HDF5 h) +{ + static char phbuf[4096]; + snprintf(phbuf,sizeof(phbuf),"{id=%u visible=%s working=%s}", + h.id, printnczparams(h.visible), printnczparams(h.working)); + return phbuf; +} + static const char* printfilter(const NCZ_Filter* f) { @@ -173,18 +163,19 @@ printfilter(const NCZ_Filter* f) #endif +#ifdef NETCDF_ENABLE_NCZARR_FILTERS /* Forward */ -static int NCZ_filter_free(NCZ_Filter* spec); -static int NCZ_filter_hdf5_clear(NCZ_HDF5* spec); -static int NCZ_filter_codec_clear(NCZ_Codec* spec); -static int NCZ_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, struct NCZ_Filter** specp); - -static int ensure_working(const NC_VAR_INFO_T* var, NCZ_Filter* filter); - +static int NCZ_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, NCZ_Filter** specp); +static int ensure_working(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCZ_Filter* filter); +static int paramclone(unsigned** dstp, const unsigned* src, size_t nparams); static int paramnczclone(NCZ_Params* dst, const NCZ_Params* src); -static int paramclone(size_t nparams, unsigned** dstp, const unsigned* src); +static int NCZ_filter_freelist1(NClist* filters); +static int NCZ_overwrite_filter(NC_FILE_INFO_T* file, NCZ_Filter* src, NCZ_Filter* dst); +static int checkfilterconflicts(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, unsigned id, size_t nparams, const unsigned int* params); +#endif /**************************************************/ +#ifdef NETCDF_ENABLE_NCZARR_FILTERS /** * @file * @internal @@ -200,137 +191,44 @@ static int paramclone(size_t nparams, unsigned** dstp, const unsigned* src); int NCZ_filter_freelists(NC_VAR_INFO_T* var) { - size_t i; int stat=NC_NOERR; - NClist* filters = NULL; - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; ZTRACE(6,"var=%s",var->hdr.name); - filters = (NClist*)var->filters; - if(filters == NULL) goto done; - /* Free the filter list elements */ - for(i=0;ifilters); var->filters = NULL; - /* Free the incomplete filters */ - filters = (NClist*)zvar->incompletefilters; + return ZUNTRACE(stat); +} + +static int +NCZ_filter_freelist1(NClist* filters) +{ + int stat=NC_NOERR; + size_t i; + NCZ_Filter* spec = NULL; + + ZTRACE(6,"|filters|=%zu",nclistlength(filters)); if(filters == NULL) goto done; /* Free the filter list elements */ for(i=0;iincompletefilters = NULL; done: return ZUNTRACE(stat); } -static int +int NCZ_filter_free(NCZ_Filter* spec) { if(spec == NULL) return NC_NOERR; - NCZ_filter_hdf5_clear(&spec->hdf5); - NCZ_filter_codec_clear(&spec->codec); + ncz_hdf5_clear(&spec->hdf5); + ncz_codec_clear(&spec->codec); free(spec); return NC_NOERR; } -static int -NCZ_filter_hdf5_clear(NCZ_HDF5* spec) -{ - ZTRACE(6,"spec=%d",spec->id); - if(spec == NULL) goto done; - nullfree(spec->visible.params); - nullfree(spec->working.params); -done: - return ZUNTRACE(NC_NOERR); -} - -static int -NCZ_filter_codec_clear(NCZ_Codec* spec) -{ - ZTRACE(6,"spec=%d",(spec?spec->id:"null")); - if(spec == NULL) goto done; - nullfree(spec->id); - nullfree(spec->codec); -done: - return ZUNTRACE(NC_NOERR); -} - -/* From NCZ_def_var_filter */ -int -NCZ_addfilter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, unsigned int id, size_t nparams, const unsigned int* params) -{ - int stat = NC_NOERR; - struct NCZ_Filter* fi = NULL; - NCZ_Plugin* plugin = NULL; - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - - ZTRACE(6,"file=%s var=%s id=%u nparams=%u params=%p",file->hdr.name,var->hdr.name,id,nparams,params); - - if(nparams > 0 && params == NULL) - {stat = NC_EINVAL; goto done;} - - if(var->filters == NULL) var->filters = (void*)nclistnew(); - if(zvar->incompletefilters == NULL) zvar->incompletefilters = (void*)nclistnew(); - - /* Before anything else, find the matching plugin */ - if((stat = NCZ_plugin_loaded((size_t)id,&plugin))) goto done; - if(plugin == NULL) { - stat = THROW(NC_ENOFILTER); - goto done; - } - - /* Find the NCZ_Filter */ - if((stat=NCZ_filter_lookup(var,id,&fi))) goto done; - if(fi != NULL) { - if(fi->plugin != plugin) - {stat = NC_EINTERNAL; goto done;} - } else { - stat = NC_NOERR; - if((fi = calloc(1,sizeof(struct NCZ_Filter))) == NULL) - {stat = NC_ENOMEM; goto done;} - fi->plugin = plugin; - if(plugin->incomplete) { - fi->flags |= (FLAG_INCOMPLETE); - nclistpush((NClist*)zvar->incompletefilters, fi); - } else - nclistpush((NClist*)var->filters, fi); - } - - /* If this variable is not fixed size, mark filter as suppressed */ - if(var->type_info->varsized) { - fi->flags |= FLAG_SUPPRESS; - nclog(NCLOGWARN,"Filters cannot be applied to variable length data types; ignored"); - } - - if(!FILTERINCOMPLETE(fi)) { - /* (over)write the HDF5 parameters */ - nullfree(fi->hdf5.visible.params); - nullfree(fi->hdf5.working.params); - /* Fill in the hdf5 */ - fi->hdf5 = hdf5_empty; /* struct copy */ - fi->hdf5.id = id; - /* Capture the visible parameters */ - fi->hdf5.visible.nparams = nparams; - if(nparams > 0) { - if((stat = paramclone(nparams,&fi->hdf5.visible.params,params))) goto done; - } - fi->hdf5.working.nparams = 0; - fi->hdf5.working.params = NULL; - fi->flags |= FLAG_VISIBLE; - } - - fi = NULL; /* either way,its in a filters list */ - -done: - if(fi) NCZ_filter_free(fi); - return ZUNTRACE(stat); -} +/**************************************************/ int NCZ_filter_remove(NC_VAR_INFO_T* var, unsigned int id) @@ -341,12 +239,12 @@ NCZ_filter_remove(NC_VAR_INFO_T* var, unsigned int id) ZTRACE(6,"var=%s id=%u",var->hdr.name,id); /* Walk backwards */ for(size_t k = nclistlength(flist); k-->0;) { - struct NCZ_Filter* f = (struct NCZ_Filter*)nclistget(flist,k); + NCZ_Filter* f = (NCZ_Filter*)nclistget(flist,k); if(f->hdf5.id == id) { /* Remove from variable */ nclistremove(flist,k); /* Reclaim */ - NCZ_filter_free(f); + NCZ_filter_free(f); f = NULL; goto done; } } @@ -356,7 +254,7 @@ NCZ_filter_remove(NC_VAR_INFO_T* var, unsigned int id) } static int -NCZ_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, struct NCZ_Filter** specp) +NCZ_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, NCZ_Filter** specp) { size_t i; NClist* flist = (NClist*)var->filters; @@ -380,60 +278,37 @@ NCZ_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, struct NCZ_Filter** specp return ZUNTRACEX(NC_NOERR,"spec=%d",IEXISTS(specp,hdf5.id)); } -#if 0 -static int -NCZ_codec_lookup(NClist* codecs, const char* id, NCZ_Codec** codecp) +int +NCZ_plugin_lookup(const char* codecid, NCZ_Plugin** pluginp) { - int i; + int stat = NC_NOERR; + size_t i; + NCZ_Plugin* plugin = NULL; + char digits[64]; + const char* trueid = NULL; + struct NCglobalstate* gs = NC_getglobalstate(); - ZTRACE(6,"|codecs|=%u id=%u", (unsigned)nclistlength(codecs), id); - if(codecp) *codecp = NULL; - - if(codecs == NULL) return NC_NOERR; - for(i=0;iid,id)==0) { - if(codecp) *codecp = spec; - break; + if(pluginp == NULL) return NC_NOERR; + + /* Find the plugin for this codecid */ + for(i=1;i<=gs->zarr.loaded_plugins_max;i++) { + NCZ_Plugin* p = gs->zarr.loaded_plugins[i]; + if(p == NULL) continue; + if(p == NULL|| p->codec.codec == NULL) continue; /* no plugin or no codec */ + if((p->codec.ishdf5raw)) { + /* get true id */ + snprintf(digits,sizeof(digits),"%d",p->hdf5.filter->id); + trueid = digits; + } else { + trueid = p->codec.codec->codecid; } + if(strcmp(codecid, trueid) == 0) + {plugin = p; break;} } - return ZUNTRACEX(NC_NOERR,"codec=%s",SEXISTS(codecp,id)); -} - -/** - * @internal Remove a filter from filter list for a variable - * - * @param ncid File ID. - * @param varid Variable ID. - * @param id filter id to remove - * - * @returns ::NC_NOERR No error. - * @returns ::NC_EBADID Bad ncid. - * @returns ::NC_ENOTVAR Invalid variable ID. - * @returns ::NC_ENOTNC4 Attempting netcdf-4 operation on file that is - * not netCDF-4/NCZARR. - * @returns ::NC_ELATEDEF Too late to change settings for this variable. - * @returns ::NC_ENOTINDEFINE Not in define mode. - * @returns ::NC_EINVAL Invalid input - * @author Dennis Heimbigner - */ -int -nc_var_filter_remove(int ncid, int varid, unsigned int filterid) -{ - NC_VAR_INFO_T *var = NULL; - int stat; - - /* Get pointer to the var. */ - if ((stat = ncz_find_grp_file_var(ncid, varid, NULL, NULL, &var))) - return stat; - assert(var); - - stat = NC4_nczarr_filter_remove(var,filterid); - + if(pluginp) *pluginp = plugin; return stat; } -#endif +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ #ifdef NETCDF_ENABLE_NCZARR_FILTERS int @@ -441,28 +316,23 @@ NCZ_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const unsigned int* params) { int stat = NC_NOERR; - NC *nc; NC_FILE_INFO_T* h5 = NULL; NC_GRP_INFO_T* grp = NULL; NC_VAR_INFO_T* var = NULL; - NCZ_Filter* oldspec = NULL; - NCZ_Filter* tmp = NULL; - int havedeflate = 0; - int haveszip = 0; + NCZ_Filter* spec = NULL; ZTRACE(1,"ncid=%d varid=%d id=%u nparams=%u params=%s",ncid,varid,id,(unsigned)nparams,nczprint_paramvector(nparams,params)); if((stat = NCZ_filter_initialize())) goto done; - if((stat = NC_check_id(ncid,&nc))) return stat; - assert(nc); - /* Find info for this file and group and var, and set pointer to each. */ - if ((stat = ncz_find_grp_file_var(ncid, varid, &h5, &grp, &var))) + if ((stat = nc4_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) {stat = THROW(stat); goto done;} assert(h5 && var && var->hdr.id == varid); + if (h5->parallel) {stat = THROW(NC_EINVAL); goto done;} + /* If the NCZARR dataset has already been created, then it is too * late to set all the extra stuff. */ if (!(h5->flags & NC_INDEF)) @@ -472,66 +342,97 @@ NCZ_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, if (var->created) {stat = THROW(NC_ELATEDEF); goto done;} - /* Lookup incoming id to see if already defined */ - if((stat=NCZ_filter_lookup(var,id,&oldspec))) goto done; + if((stat = checkfilterconflicts(h5,var,id,nparams,params))) goto done; + if((spec = (NCZ_Filter*)calloc(1,sizeof(NCZ_Filter)))==NULL) {stat = NC_ENOMEM; goto done;} - /* See if deflate &/or szip is defined */ - if((stat = NCZ_filter_lookup(var,H5Z_FILTER_DEFLATE,&tmp))) goto done; - havedeflate = (tmp == NULL ? 0 : 1); - stat = NC_NOERR; /* reset */ + /* Filter => chunking */ + var->storage = NC_CHUNKED; + /* Determine default chunksizes for this variable unless already specified */ + if(var->chunksizes && !var->chunksizes[0]) { + /* Should this throw error? */ + if((stat = nc4_find_default_chunksizes2(grp, var))) goto done; + } - if((stat = NCZ_filter_lookup(var,H5Z_FILTER_SZIP,&tmp))) goto done; - haveszip = (tmp == NULL ? 0 : 1); - stat = NC_NOERR; /* reset */ + /* addfilter can handle case where filter is already defined, and will just replace parameters */ + spec->hdf5.id = id; + spec->hdf5.visible.nparams = nparams; + if((stat = paramclone(&spec->hdf5.visible.params,params,nparams))) goto done; + spec->flags |= FLAG_VISIBLE; + if((stat = NCZ_addfilter(h5,var,&spec))) goto done; /* addfilter will control spec memory */ + +done: + NCZ_filter_free(spec); + return ZUNTRACE(stat); +} + +int +NCZ_addfilter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCZ_Filter** fip) +{ + int stat = NC_NOERR; + NCZ_Filter* fi = NULL; + NCZ_Filter* oldfi = NULL; + int exists = 0; + + ZTRACE(6,"file=%s var=%s",file->hdr.name,var->hdr.name); + + assert(fip != NULL && *fip != NULL); + fi = *fip; + + if(fi->hdf5.visible.nparams > 0 && fi->hdf5.visible.params == NULL) {stat = NC_EINVAL; goto done;} + + /* Lookup incoming id to see if already defined */ + if((stat=NCZ_filter_lookup(var,fi->hdf5.id,&oldfi))) goto done; + /* If incoming filter not already defined, then check for conflicts */ - if(oldspec == NULL) { - if(id == H5Z_FILTER_DEFLATE) { - int level; - if(nparams != 1) - {stat = THROW(NC_EFILTER); goto done;}/* incorrect no. of parameters */ - level = (int)params[0]; - if (level < NC_MIN_DEFLATE_LEVEL || level > NC_MAX_DEFLATE_LEVEL) - {stat = THROW(NC_EINVAL); goto done;} - /* If szip compression is already applied, return error. */ - if(haveszip) {stat = THROW(NC_EINVAL); goto done;} - } - if(id == H5Z_FILTER_SZIP) { /* Do error checking */ - if(nparams != 2) - {stat = THROW(NC_EFILTER); goto done;}/* incorrect no. of parameters */ - /* Pixels per block must be an even number, < 32. */ - if (params[1] % 2 || params[1] > NC_MAX_PIXELS_PER_BLOCK) - {stat = THROW(NC_EINVAL); goto done;} - /* If zlib compression is already applied, return error. */ - if(havedeflate) {stat = THROW(NC_EINVAL); goto done;} - } - /* Filter => chunking */ - var->storage = NC_CHUNKED; - /* Determine default chunksizes for this variable unless already specified */ - if(var->chunksizes && !var->chunksizes[0]) { - /* Should this throw error? */ - if((stat = nc4_find_default_chunksizes2(grp, var))) - goto done; - } - } - /* More error checking */ - if(id == H5Z_FILTER_SZIP) { /* szip X chunking error checking */ - /* For szip, the pixels_per_block parameter must not be greater - * than the number of elements in a chunk of data. */ - size_t num_elem = 1; - size_t d; - for (d = 0; d < var->ndims; d++) - if (var->dim[d]->len) - num_elem *= var->dim[d]->len; - /* Pixels per block must be <= number of elements. */ - if (params[1] > num_elem) - {stat = THROW(NC_EINVAL); goto done;} - } - /* addfilter can handle case where filter is already defined, and will just replace parameters */ - if((stat = NCZ_addfilter(h5,var,id,nparams,params))) - goto done; - if (h5->parallel) - {stat = THROW(NC_EINVAL); goto done;} + if(oldfi == NULL) { + if((stat = checkfilterconflicts(file,var,fi->hdf5.id,fi->hdf5.visible.nparams,fi->hdf5.visible.params))) goto done; + exists = 0; + } else + exists = 1; + + /* Warning if filter already exists, fi will be changed to be that filter and old fi will be reclaimed */ + /* If it already exists, then overwrite the parameters */ + if(exists) { + /* Overwrite old filter and let caller reclaim *fip */ + if((stat = NCZ_overwrite_filter(file,fi,oldfi))) goto done; + } else { /*!exists*/ + NCZ_Plugin* plugin = NULL; + /* Find the matching plugin, if any */ + if((stat = NCZ_plugin_loaded(fi->hdf5.id,&plugin))) goto done; + assert(fi->plugin == NULL || fi->plugin == plugin); + fi->plugin = plugin; + if(fi->plugin == NULL || plugin->incomplete) fi->incomplete = 1; + /* Add to filters list */ + if(var->filters == NULL) var->filters = nclistnew(); + nclistpush((NClist*)var->filters, fi); + fi = NULL; + } + if(fip) *fip = fi; /* assert control */ + +done: + return ZUNTRACE(stat); +} + +static int +NCZ_overwrite_filter(NC_FILE_INFO_T* file, NCZ_Filter* src, NCZ_Filter* dst) +{ + int stat = NC_NOERR; + + NC_UNUSED(file); + + /* (over)write the HDF5 parameters */ + ncz_hdf5_clear(&dst->hdf5); + ncz_codec_clear(&dst->codec); + /* Fill in the hdf5 and codec*/ + dst->hdf5 = src->hdf5; /* get non-pointer fields */ + /* Avoid taking control of params */ + if((stat = paramclone(&dst->hdf5.visible.params,src->hdf5.visible.params,src->hdf5.visible.nparams))) goto done; + assert(src->hdf5.working.nparams == 0 && src->hdf5.working.params == NULL); + dst->codec = src->codec; /* get non-pointer fields */ + /* Avoid taking control of fields */ + dst->codec.id = nulldup(src->codec.id); + dst->codec.codec = nulldup(src->codec.codec); done: return ZUNTRACE(stat); @@ -554,9 +455,7 @@ NCZ_inq_var_filter_ids(int ncid, int varid, size_t* nfiltersp, unsigned int* ids assert(nc); /* Find info for this file and group and var, and set pointer to each. */ - if ((stat = ncz_find_grp_file_var(ncid, varid, &h5, &grp, &var))) - {stat = THROW(stat); goto done;} - + if ((stat = nc4_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) {stat = THROW(stat); goto done;} assert(h5 && var && var->hdr.id == varid); /* Make sure all the filters are defined */ @@ -568,7 +467,7 @@ NCZ_inq_var_filter_ids(int ncid, int varid, size_t* nfiltersp, unsigned int* ids if(nfilters > 0 && ids != NULL) { size_t k; for(k=0;khdf5.id; } } @@ -586,7 +485,7 @@ NCZ_inq_var_filter_info(int ncid, int varid, unsigned int id, size_t* nparamsp, NC_FILE_INFO_T* h5 = NULL; NC_GRP_INFO_T* grp = NULL; NC_VAR_INFO_T* var = NULL; - struct NCZ_Filter* spec = NULL; + NCZ_Filter* spec = NULL; ZTRACE(1,"ncid=%d varid=%d id=%u",ncid,varid,id); @@ -594,9 +493,7 @@ NCZ_inq_var_filter_info(int ncid, int varid, unsigned int id, size_t* nparamsp, assert(nc); /* Find info for this file and group and var, and set pointer to each. */ - if ((stat = ncz_find_grp_file_var(ncid, varid, &h5, &grp, &var))) - {stat = THROW(stat); goto done;} - + if ((stat = nc4_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) {stat = THROW(stat); goto done;} assert(h5 && var && var->hdr.id == varid); /* Make sure all the plugins are defined */ @@ -604,21 +501,6 @@ NCZ_inq_var_filter_info(int ncid, int varid, unsigned int id, size_t* nparamsp, if((stat = NCZ_filter_lookup(var,id,&spec))) goto done; if(spec != NULL) { -#if 0 - if(spec->flags & FLAG_WORKING) {/* working params are available */ - if(spec->plugin->codec.codec->NCZ_visible_parameters) { - stat = spec->plugin->codec.codec->NCZ_visible_parameters(ncid,varid, - spec->hdf5.working.nparams,spec->hdf5.working.params, - &spec->hdf5.visible.nparams,&spec->hdf5.visible.params); -#ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: NCZ_visible_parameters: ncid=%d varid=%d working=%s visible=%s\n",ncid,varid, - printnczparams(spec->hdf5.visible),printnczparams(spec->hdf5.working)); -#endif - if(stat) goto done; - } - spec->flags |= FLAG_VISIBLE; - } -#endif /* return the current visible parameters */ if(nparamsp) *nparamsp = spec->hdf5.visible.nparams; if(params && spec->hdf5.visible.nparams > 0) @@ -655,11 +537,63 @@ NCZ_inq_filter_avail(int ncid, unsigned id) return ZUNTRACE(stat); } +static int +checkfilterconflicts(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, unsigned id, size_t nparams, const unsigned int* params) +{ + int stat = NC_NOERR; + NCZ_Filter* deflate = NULL; + NCZ_Filter* szip = NULL; + int havedeflate, haveszip; + + NC_UNUSED(file); + + /* See if deflate &/or szip is defined */ + if((stat = NCZ_filter_lookup(var,H5Z_FILTER_DEFLATE,&deflate))) goto done; + havedeflate = (deflate == NULL ? 0 : 1); + stat = NC_NOERR; /* reset */ + + if((stat = NCZ_filter_lookup(var,H5Z_FILTER_SZIP,&szip))) goto done; + haveszip = (szip == NULL ? 0 : 1); + stat = NC_NOERR; /* reset */ + + if(id == H5Z_FILTER_DEFLATE) { + int level; + if(nparams != 1) {stat = THROW(NC_EFILTER); goto done;}/* incorrect no. of parameters */ + level = (int)params[0]; + if (level < NC_MIN_DEFLATE_LEVEL || level > NC_MAX_DEFLATE_LEVEL) + {stat = THROW(NC_EINVAL); goto done;} + /* If szip compression is already applied, return error. */ + if(haveszip) {stat = THROW(NC_EINVAL); goto done;} + } + if(id == H5Z_FILTER_SZIP) { /* Do error checking */ + if(nparams != 2) {stat = THROW(NC_EFILTER); goto done;}/* incorrect no. of parameters */ + /* Pixels per block must be an even number, < 32. */ + if (params[1] % 2 || params[1] > NC_MAX_PIXELS_PER_BLOCK) {stat = THROW(NC_EINVAL); goto done;} + /* If zlib compression is already applied, return error. */ + if(havedeflate) {stat = THROW(NC_EINVAL); goto done;} + } + /* More error checking */ + if(id == H5Z_FILTER_SZIP) { /* szip X chunking error checking */ + /* For szip, the pixels_per_block parameter must not be greater + * than the number of elements in a chunk of data. */ + size_t num_elem = 1; + size_t d; + for (d = 0; d < var->ndims; d++) { + if (var->dim[d]->len) num_elem *= var->dim[d]->len; + } + /* Pixels per block must be <= number of elements. */ + if (params[1] > num_elem) {stat = THROW(NC_EINVAL); goto done;} + } +done: + return THROW(stat); +} + #endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ /**************************************************/ /* Filter application functions */ +#ifdef NETCDF_ENABLE_NCZARR_FILTERS int NCZ_filter_initialize(void) { @@ -670,9 +604,7 @@ NCZ_filter_initialize(void) NCZ_filter_initialized = 1; -#ifdef NETCDF_ENABLE_NCZARR_FILTERS if((stat = NCZ_load_all_plugins())) goto done; -#endif done: return ZUNTRACE(stat); } @@ -681,34 +613,37 @@ int NCZ_filter_finalize(void) { int stat = NC_NOERR; + ZTRACE(6,""); if(!NCZ_filter_initialized) goto done; NCZ_filter_initialized = 0; done: return ZUNTRACE(stat); } + int -NCZ_applyfilterchain(const NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* chain, size_t inlen, void* indata, size_t* outlenp, void** outdatap, int encode) +NCZ_applyfilterchain(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* chain, size_t inlen, void* indata, size_t* outlenp, void** outdatap, int encode) { - size_t i; int stat = NC_NOERR; + size_t i; void* lastbuffer = NULL; /* if not null, then last allocated buffer */ ZTRACE(6,"|chain|=%u inlen=%u indata=%p encode=%d", (unsigned)nclistlength(chain), (unsigned)inlen, indata, encode); /* Make sure all the filters are loaded && setup */ for(i=0;ihdf5.id > 0 && f->plugin != NULL); if(!(f->flags & FLAG_WORKING)) {/* working not yet available */ - if((stat = ensure_working(var,f))) goto done; + if((stat = ensure_working(file,var,f))) goto done; } } { - struct NCZ_Filter* f = NULL; + NCZ_Filter* f = NULL; const H5Z_class2_t* ff = NULL; size_t current_alloc = inlen; void* current_buf = indata; @@ -720,45 +655,45 @@ NCZ_applyfilterchain(const NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* cha #ifdef DEBUG fprintf(stderr,">>> current: alloc=%u used=%u buf=%p\n",(unsigned)current_alloc,(unsigned)current_used,current_buf); #endif - /* Apply in proper order */ - if(encode) { - for(i=0;iflags & FLAG_SUPPRESS) continue; /* this filter should not be applied */ - ff = f->plugin->hdf5.filter; - /* code can be simplified */ - next_alloc = current_alloc; - next_buf = current_buf; - next_used = 0; - next_used = ff->filter(0,f->hdf5.working.nparams,f->hdf5.working.params,current_used,&next_alloc,&next_buf); + ff = f->plugin->hdf5.filter; + /* code can be simplified */ + next_alloc = current_alloc; + next_buf = current_buf; + next_used = 0; + next_used = ff->filter(0,f->hdf5.working.nparams,f->hdf5.working.params,current_used,&next_alloc,&next_buf); #ifdef DEBUG fprintf(stderr,">>> next: alloc=%u used=%u buf=%p\n",(unsigned)next_alloc,(unsigned)next_used,next_buf); #endif if(next_used == 0) {stat = NC_EFILTER; lastbuffer = next_buf; goto done; } /* If the filter did not need to create a new buffer, then next == current else current was reclaimed */ - current_buf = next_buf; - current_alloc = next_alloc; - current_used = next_used; + current_buf = next_buf; + current_alloc = next_alloc; + current_used = next_used; } } else { /* Apply in reverse order */ - for(size_t k=nclistlength(chain); k-->0;) { - f = (struct NCZ_Filter*)nclistget(chain, k); + for(size_t k=nclistlength(chain); k-->0;) { + f = (NCZ_Filter*)nclistget(chain, k); if(f->flags & FLAG_SUPPRESS) continue; /* this filter should not be applied */ - ff = f->plugin->hdf5.filter; - /* code can be simplified */ - next_alloc = current_alloc; - next_buf = current_buf; - next_used = 0; - next_used = ff->filter(H5Z_FLAG_REVERSE,f->hdf5.working.nparams,f->hdf5.working.params,current_used,&next_alloc,&next_buf); + ff = f->plugin->hdf5.filter; + /* code can be simplified */ + next_alloc = current_alloc; + next_buf = current_buf; + next_used = 0; + next_used = ff->filter(H5Z_FLAG_REVERSE,f->hdf5.working.nparams,f->hdf5.working.params,current_used,&next_alloc,&next_buf); #ifdef DEBUG fprintf(stderr,">>> next: alloc=%u used=%u buf=%p\n",(unsigned)next_alloc,(unsigned)next_used,next_buf); #endif if(next_used == 0) {stat = NC_EFILTER; lastbuffer = next_buf; goto done;} /* If the filter did not need to create a new buffer, then next == current else current was reclaimed */ - current_buf = next_buf; - current_alloc = next_alloc; - current_used = next_used; + current_buf = next_buf; + current_alloc = next_alloc; + current_used = next_used; } } #ifdef DEBUG @@ -774,199 +709,31 @@ fprintf(stderr,">>> current: alloc=%u used=%u buf=%p\n",(unsigned)current_alloc, return ZUNTRACEX(stat,"outlen=%u outdata=%p",(unsigned)*outlenp,*outdatap); } -/**************************************************/ -/* JSON Parse/unparse of filters */ -int -NCZ_filter_jsonize(const NC_FILE_INFO_T* file, const NC_VAR_INFO_T* var, NCZ_Filter* filter, NCjson** jfilterp) -{ - int stat = NC_NOERR; - NCjson* jfilter = NULL; - - ZTRACE(6,"var=%s filter=%s",var->hdr.name,(filter != NULL && filter->codec.id != NULL?filter->codec.id:"null")); - - /* assumptions */ - assert(filter->flags & FLAG_WORKING); - - /* Convert the HDF5 id + parameters to the codec form */ - - /* We need to ensure the the current visible parameters are defined and had the opportunity to come - from the working parameters */ - assert((filter->flags & (FLAG_VISIBLE | FLAG_WORKING)) == (FLAG_VISIBLE | FLAG_WORKING)); -#if 0 - if((stat = rebuild_visible(var,filter))) goto done; -#endif - - /* Convert the visible parameters back to codec */ - /* Clear any previous codec */ - nullfree(filter->codec.id); filter->codec.id = NULL; - nullfree(filter->codec.codec); filter->codec.codec = NULL; - filter->codec.id = strdup(filter->plugin->codec.codec->codecid); - if(filter->plugin->codec.codec->NCZ_hdf5_to_codec) { - stat = filter->plugin->codec.codec->NCZ_hdf5_to_codec(filter->hdf5.visible.nparams,filter->hdf5.visible.params,&filter->codec.codec); -#ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: NCZ_hdf5_to_codec: visible=%s codec=%s\n",printnczparams(filter->hdf5.visible),filter->codec.codec); -#endif - if(stat) goto done; - } else - {stat = NC_EFILTER; goto done;} - - /* Parse the codec as the return */ - if(NCJparse(filter->codec.codec,0,&jfilter) < 0) {stat = NC_EFILTER; goto done;} - if(jfilterp) {*jfilterp = jfilter; jfilter = NULL;} - -done: - NCJreclaim(jfilter); - return ZUNTRACEX(stat,"codec=%s",NULLIFY(filter->codec.codec)); -} - -/* Build filter from parsed Zarr metadata */ -int -NCZ_filter_build(const NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const NCjson* jfilter, int chainindex) -{ - int stat = NC_NOERR; - NCZ_Filter* filter = NULL; - const NCjson* jvalue = NULL; - NCZ_Plugin* plugin = NULL; - NCZ_Codec codec = codec_empty; - NCZ_HDF5 hdf5 = hdf5_empty; - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - - ZTRACE(6,"file=%s var=%s jfilter=%s",file->hdr.name,var->hdr.name,NCJtrace(jfilter)); - - if(var->filters == NULL) var->filters = nclistnew(); - if(zvar->incompletefilters == NULL) zvar->incompletefilters = nclistnew(); - - /* Get the id of this codec filter */ - if(NCJdictget(jfilter,"id",&jvalue)<0) {stat = NC_EFILTER; goto done;} - if(NCJsort(jvalue) != NCJ_STRING) { - stat = THROW(NC_ENOFILTER); goto done; - } - - /* Build the codec */ - if((codec.id = strdup(NCJstring(jvalue)))==NULL) - {stat = NC_ENOMEM; goto done;} - if(NCJunparse(jfilter,0,&codec.codec)<0) {stat = NC_EFILTER; goto done;} - - /* Find the plugin for this filter */ - if((stat = NCZ_plugin_loaded_byname(NCJstring(jvalue),&plugin))) goto done; - /* Will always have a filter; possibly unknown */ - if((filter = calloc(1,sizeof(NCZ_Filter)))==NULL) {stat = NC_ENOMEM; goto done;} - filter->chainindex = chainindex; - - if(plugin != NULL) { - /* Save the hdf5 id */ - hdf5.id = plugin->codec.codec->hdf5id; - /* Convert the codec to hdf5 form visible parameters */ - if(plugin->codec.codec->NCZ_codec_to_hdf5) { - stat = plugin->codec.codec->NCZ_codec_to_hdf5(codec.codec,&hdf5.visible.nparams,&hdf5.visible.params); -#ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: NCZ_codec_to_hdf5: codec=%s, hdf5=%s\n",printcodec(codec),printhdf5(hdf5)); -#endif - if(stat) goto done; - } - filter->flags |= FLAG_VISIBLE; - filter->hdf5 = hdf5; hdf5 = hdf5_empty; - filter->codec = codec; codec = codec_empty; - filter->flags |= FLAG_CODEC; - filter->plugin = plugin; plugin = NULL; - } else { - /* Create a fake filter so we do not forget about this codec */ - filter->hdf5 = hdf5_empty; - filter->codec = codec; codec = codec_empty; - filter->flags |= (FLAG_INCOMPLETE|FLAG_CODEC); - } - - if(filter != NULL) { - NClist* filterlist = (NClist*)var->filters; - nclistpush(filterlist,filter); - filter = NULL; - } - -done: - ncz_hdf5_clear(&hdf5); - ncz_codec_clear(&codec); - NCZ_filter_free(filter); - return ZUNTRACE(stat); -} - -/**************************************************/ -/* _Codecs attribute */ - -int -NCZ_codec_attr(const NC_VAR_INFO_T* var, size_t* lenp, void* data) -{ - size_t i; - int stat = NC_NOERR; - size_t len; - char* contents = NULL; - NCbytes* buf = NULL; - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - NClist* filters = (NClist*)var->filters; - NClist* incfilters = (NClist*)zvar->incompletefilters; - NCZ_Filter** chain = NULL; - size_t nfilters; - - ZTRACE(6,"var=%s",var->hdr.name); - - nfilters = nclistlength(filters) + nclistlength(incfilters); - - if(nfilters == 0) - {stat = NC_ENOTATT; goto done;} - - /* Collect all filters in chain order */ - chain = (NCZ_Filter**)calloc(sizeof(NCZ_Filter*),nfilters); - if(chain == NULL) {stat = NC_ENOMEM; goto done;} - - /* Sort the complete and incomplete filters in order in the chain */ - for(i=0;ichainindex] == NULL); - chain[f->chainindex] = f; - } - for(i=0;ichainindex] == NULL); - chain[f->chainindex] = f; - } - - /* Now construct the attribute */ - buf = ncbytesnew(); ncbytessetalloc(buf,1024); - ncbytescat(buf,"["); - for(i=0;i 0) ncbytescat(buf,","); - ncbytescat(buf,spec->codec.codec); - } - ncbytescat(buf,"]"); - - len = ncbyteslength(buf); - contents = nclistcontents(buf); - if(lenp) *lenp = len; - if(data) strncpy((char*)data,contents,len+1); -done: - nullfree(chain); - ncbytesfree(buf); - return ZUNTRACEX(stat,"len=%u data=%p",(unsigned)len,data); -} - static int -ensure_working(const NC_VAR_INFO_T* var, NCZ_Filter* filter) +ensure_working(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCZ_Filter* filter) { int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCproplist* props = NULL; + if(FILTERINCOMPLETE(filter)) {stat = THROW(NC_ENOFILTER); goto done;} if(!(filter->flags & FLAG_WORKING)) { const size_t oldnparams = filter->hdf5.visible.nparams; const unsigned* oldparams = filter->hdf5.visible.params; assert(filter->flags & FLAG_VISIBLE); - /* Convert the visible parameters to working parameters; may also modify the visible params */ + /* Convert the visible parameters to working parameters; may also modify the visible params */ #ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: NCZ_modify_parameters: before: visible=%s working=%s\n", - printparams(filter->hdf5.visible.nparams,filter->hdf5.visible.params), - printparams(filter->hdf5.working.nparams,filter->hdf5.working.params)); + fprintf(stderr,">>> DEBUGF: NCZ_modify_parameters: before: visible=%s working=%s\n", + printparams(filter->hdf5.visible.nparams,filter->hdf5.visible.params), + printparams(filter->hdf5.working.nparams,filter->hdf5.working.params)); #endif - if(filter->plugin && filter->plugin->codec.codec->NCZ_modify_parameters) { - stat = filter->plugin->codec.codec->NCZ_modify_parameters(ncidfor(var),var->hdr.id, + if(filter->plugin && filter->plugin->codec.codec->NCZ_modify_parameters) { + props = ncproplistnew(); + if((stat=ncproplistclone(zfile->zarr.zarr_format==2?NCplistzarrv2:NCplistzarrv3,props))) goto done; + ncproplistadd(props,"fileid",(size_t)ncidfor(var)); + ncproplistadd(props,"varid",(uintptr_t)var->hdr.id); + stat = filter->plugin->codec.codec->NCZ_modify_parameters(props,&filter->hdf5.id, &filter->hdf5.visible.nparams, &filter->hdf5.visible.params, &filter->hdf5.working.nparams, &filter->hdf5.working.params); #ifdef DEBUGF @@ -974,20 +741,20 @@ ensure_working(const NC_VAR_INFO_T* var, NCZ_Filter* filter) printfilter(filter)); #endif if(stat) goto done; - /* See if the visible parameters were changed */ + /* See if the visible parameters were changed */ if(oldnparams != filter->hdf5.visible.nparams || oldparams != filter->hdf5.visible.params) - filter->flags |= FLAG_NEWVISIBLE; + filter->flags |= FLAG_NEWVISIBLE; } else { /* assume visible are unchanged */ assert(oldnparams == filter->hdf5.visible.nparams && oldparams == filter->hdf5.visible.params); /* unchanged */ /* Just copy the visible parameters */ - nullfree(filter->hdf5.working.params); + nullfree(filter->hdf5.working.params); filter->hdf5.working.params = NULL; if((stat = paramnczclone(&filter->hdf5.working,&filter->hdf5.visible))) goto done; } #ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: NCZ_modify_parameters: after: visible=%s working=%s\n", - printparams(filter->hdf5.visible.nparams,filter->hdf5.visible.params), - printparams(filter->hdf5.working.nparams,filter->hdf5.working.params)); + fprintf(stderr,">>> DEBUGF: NCZ_modify_parameters: after: visible=%s working=%s\n", + printparams(filter->hdf5.visible.nparams,filter->hdf5.visible.params), + printparams(filter->hdf5.working.nparams,filter->hdf5.working.params)); #endif filter->flags |= FLAG_WORKING; } @@ -995,62 +762,13 @@ ensure_working(const NC_VAR_INFO_T* var, NCZ_Filter* filter) fprintf(stderr,">>> DEBUGF: ensure_working_parameters: ncid=%lu varid=%u filter=%s\n", ncidfor(var), (unsigned)var->hdr.id,printfilter(filter)); #endif done: + ncproplistfree(props); return THROW(stat); } -#if 0 -static int -rebuild_visible(const NC_VAR_INFO_T* var, NCZ_Filter* filter) -{ - int stat = NC_NOERR; - int nvisible0; - unsigned* visible0 = NULL; - - assert(filter->flags & FLAG_WORKING); - /* If the visible parameters are previously defined, save them */ - if(filter->flags & FLAG_VISIBLE) { - nvisible0 = filter->hdf5.visible.nparams; - visible0 = filter->hdf5.visible.params; - filter->hdf5.visible.nparams = 0; - filter->hdf5.visible.params = NULL; /* temporary */ - } - /* Cases to consider: - 1. visible already defined && NCZ_visible_parameters defined => apply - 2. visible not defined && NCZ_visible_parameters defined defined => apply - 3. visible already defined && NCZ_visible_parameters not defined => keep originals - 4. visible not defined && NCZ_visible_parameters not defined => use working parameters - */ - - /* Cases 1 and 2 */ - /* Convert the working parameters to visibleparameters, overwriting any existing visibles */ - if(filter->plugin->codec.codec->NCZ_visible_parameters) { - stat = filter->plugin->codec.codec->NCZ_visible_parameters(ncidfor(var),var->hdr.id, - filter->hdf5.working.nparams, filter->hdf5.working.params, - &filter->hdf5.visible.nparams, &filter->hdf5.visible.params); - if(stat) goto done; - } else if(filter->flags & FLAG_CODEC) {/* Case 3 */ - filter->hdf5.visible.nparams = nvisible0; - filter->hdf5.visible.params = visible0; visible0 = NULL; - } else {/* Case 4 */ - /* Use the working parameters as the visible parameters */ - filter->hdf5.visible.nparams = filter->hdf5.working.nparams; - if(filter->hdf5.working.nparams > 0) { - if((stat = paramnczclone(&filter->hdf5.visible,&filter->hdf5.working))) goto done; - } - } - filter->flags |= FLAG_VISIBLE; -#ifdef DEBUGF - fprintf(stderr,">>> DEBUGF: rebuild_visible_parameters: ncid=%lu varid=%u filter=%s\n", ncidfor(var), (unsigned)var->hdr.id,printfilter(filter)); -#endif -done: - nullfree(visible0); - return THROW(stat); -} -#endif - /* Called by NCZ_enddef to ensure that the working parameters are defined */ int -NCZ_filter_setup(NC_VAR_INFO_T* var) +NCZ_filter_setup(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) { size_t i; int stat = NC_NOERR; @@ -1061,14 +779,14 @@ NCZ_filter_setup(NC_VAR_INFO_T* var) filters = (NClist*)var->filters; for(i=0;iplugin != NULL); - assert((filter->flags & FLAG_VISIBLE)); /* Assume visible params are defined */ - /* verify */ + assert((filter->flags & FLAG_VISIBLE)); /* Assume visible params are defined */ + /* verify */ assert(filter->hdf5.id > 0 && (filter->hdf5.visible.nparams == 0 || filter->hdf5.visible.params != NULL)); /* Initialize the working parameters */ - if((stat = ensure_working(var,filter))) goto done; + if((stat = ensure_working(file,var,filter))) goto done; #ifdef DEBUGF fprintf(stderr,">>> DEBUGF: NCZ_filter_setup: ncid=%d varid=%d filter=%s\n", (int)ncidfor(var),(int)var->hdr.id, printfilter(filter)); @@ -1079,12 +797,48 @@ NCZ_filter_setup(NC_VAR_INFO_T* var) return ZUNTRACE(stat); } +/* _Codecs attribute */ +int +NCZ_codec_attr(NC_VAR_INFO_T* var, size_t* lenp, void* data) +{ + size_t i; + int stat = NC_NOERR; + size_t len; + char* contents = NULL; + NCbytes* buf = NULL; + NClist* filters = (NClist*)var->filters; + size_t nfilters; + + ZTRACE(6,"var=%s",var->hdr.name); + + nfilters = nclistlength(filters); + + if(nfilters == 0) + {stat = NC_ENOTATT; goto done;} + + buf = ncbytesnew(); ncbytessetalloc(buf,1024); + ncbytescat(buf,"["); + for(i=0;i 0) ncbytescat(buf,","); + ncbytescat(buf,spec->codec.codec); + } + ncbytescat(buf,"]"); + + len = ncbyteslength(buf); + contents = nclistcontents(buf); + if(lenp) *lenp = len; + if(data) strncpy((char*)data,contents,len+1); +done: + ncbytesfree(buf); + return ZUNTRACEX(stat,"len=%u data=%p",(unsigned)len,data); +} /**************************************************/ /* Clone an hdf5 parameter set */ static int -paramclone(size_t nparams, unsigned** dstp, const unsigned* src) +paramclone(unsigned** dstp, const unsigned* src, size_t nparams) { unsigned* dst = NULL; if(nparams > 0) { @@ -1102,5 +856,24 @@ paramnczclone(NCZ_Params* dst, const NCZ_Params* src) { assert(src != NULL && dst != NULL && dst->params == NULL); *dst = *src; - return paramclone(src->nparams,&dst->params,src->params); + return paramclone(&dst->params,src->params,src->nparams); } + +void +ncz_hdf5_clear(NCZ_HDF5* h) +{ + nullfree(h->visible.params); + nullfree(h->working.params); + *h = NCZ_hdf5_empty(); +} + +void +ncz_codec_clear(NCZ_Codec* c) +{ + nullfree(c->id); + nullfree(c->codec); + *c = NCZ_codec_empty(); +} + +#else +#endif diff --git a/libnczarr/zfilter.h b/libnczarr/zfilter.h index 2ef5c8fd13..1d147a8777 100644 --- a/libnczarr/zfilter.h +++ b/libnczarr/zfilter.h @@ -19,18 +19,98 @@ /*Mnemonic*/ #define ENCODING 1 +/* The NC_VAR_INFO_T->filters field is an NClist of this struct. +Each filter can have two parts: HDF5 and Codec. +The NC_VAR_INFO_T.filters list only holds entries where both the HDF5 info +and the codec info are defined. +The NCZ_VAR_INFO_T.codecs list holds the codec info when reading a Zarr file. +Note that it is not possible to have an entry on the filters list that does not +have both HDF5 and codec. This is because nc_def_var_filter will fail if the codec +part is not available. If a codec is read from a file and there is no available +corresponding HDF5 implementation, then that codec will not appear in the filters list. +It is possible that some subset of the codecs do have a corresponding HDF5, but we +enforce the rule that no entries go into the filters list unless all are defined. +It is still desirable for a user to be able to see what filters and codecs are defined +for a variable. This is accommodated by providing two special attributes: +1, "_Filters" attribute shows the HDF5 filters defined on the variable, if any. +2, "_Codecs" attribute shows the codecs defined on the variable; for zarr, this list + should always be defined. + +For Zarr V3, we add a notion of "pseudo" filters. These are filters +that place-holders to satisfy the Zarr V3 spec, but whose action is +actually handled elsewhere in the nczarr code. +There is currently one one such pseudo filter: "bytes". +This filter is expected to be the first filter in the filter chain. +Semantically, "bytes" handles data type endianness. + +Such filters do not appear in the _Codecs attribute or the _Filters attribute, +but are in the Zarr metadata. + +When reading a V3 Zarr dataset, the "bytes" codec must occur first in the filter chain, +else the array is marked as unreadable. +If it the first codec, then it is parsed to find out the endianness of the array. +*/ + /* Opaque */ -struct NCZ_Filter; +struct H5Z_class2_t; +struct NCZ_codec_t; +struct NCPSharedLib; +struct NCZ_Plugin; +struct NCZ_Codec; +struct NCZ_HDF5; + +typedef struct NCZ_Params { + size_t nparams; + unsigned* params; +} NCZ_Params; + +/* HDF5 Info */ +typedef struct NCZ_HDF5 { + unsigned id; /**< HDF5 id corresponding to filterid. */ + NCZ_Params visible; + NCZ_Params working; +} NCZ_HDF5; +extern struct NCZ_HDF5 NCZ_hdf5_empty(void); + +/* Codec Info */ +typedef struct NCZ_Codec { + char* id; /**< The NumCodecs ID */ + char* codec; /**< The codec string from the file; NULL if creating */ + int pseudo; /**< If the codec action is handled by non-codec code in netcdf-c */ +} NCZ_Codec; +extern struct NCZ_Codec NCZ_codec_empty(); + +typedef struct NCZ_Filter { + NCZ_HDF5 hdf5; + NCZ_Codec codec; + struct NCZ_Plugin* plugin; /**< Implementation of this filter. */ + int incomplete; /* If set, => filter has no complete matching plugin */ + int flags; /**< Flags describing state of this filter. */ +# define FLAG_VISIBLE 1 /* If set, then visible parameters are defined */ +# define FLAG_WORKING 2 /* If set, then WORKING parameters are defined */ +# define FLAG_CODEC 4 /* If set, then visbile parameters come from an existing codec string */ +# define FLAG_HDF5 8 /* If set, => visible parameters came from nc_def_var_filter */ +# define FLAG_NEWVISIBLE 16 /* If set, => visible parameters were modified */ +# define FLAG_INCOMPLETE 32 /* If set, => filter has no complete matching plugin */ +# define FLAG_SUPPRESS 64 /* If set, => filter should not be used (probably because variable is not fixed size */ +} NCZ_Filter; int NCZ_filter_initialize(void); int NCZ_filter_finalize(void); -int NCZ_addfilter(NC_FILE_INFO_T*, NC_VAR_INFO_T* var, unsigned int id, size_t nparams, const unsigned int* params); -int NCZ_filter_setup(NC_VAR_INFO_T* var); + +int NCZ_addfilter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCZ_Filter** filterp); +int NCZ_plugin_lookup(const char* codecid, struct NCZ_Plugin** pluginp); + +int NCZ_filter_verify(NCZ_Filter* filter, int varsized); +int NCZ_filter_setup(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var); int NCZ_filter_freelists(NC_VAR_INFO_T* var); int NCZ_codec_freelist(NCZ_VAR_INFO_T* zvar); -int NCZ_applyfilterchain(const NC_FILE_INFO_T*, NC_VAR_INFO_T*, NClist* chain, size_t insize, void* indata, size_t* outlen, void** outdata, int encode); -int NCZ_filter_jsonize(const NC_FILE_INFO_T*, const NC_VAR_INFO_T*, struct NCZ_Filter* filter, struct NCjson**); -int NCZ_filter_build(const NC_FILE_INFO_T*, NC_VAR_INFO_T* var, const NCjson* jfilter, int chainindex); -int NCZ_codec_attr(const NC_VAR_INFO_T* var, size_t* lenp, void* data); +int NCZ_filter_free(NCZ_Filter* spec); +int NCZ_applyfilterchain(NC_FILE_INFO_T*, NC_VAR_INFO_T*, NClist* chain, size_t insize, void* indata, size_t* outlen, void** outdata, int encode); +int NCZ_filter_jsonize(NC_FILE_INFO_T*, NC_VAR_INFO_T*, struct NCZ_Filter* filter, struct NCjson**); +int NCZ_filter_build(NC_FILE_INFO_T*, NC_VAR_INFO_T* var, const NCjson* jfilter, int chainindex); +int NCZ_codec_attr(NC_VAR_INFO_T* var, size_t* lenp, void* data); +void ncz_hdf5_clear(NCZ_HDF5* h); +void ncz_codec_clear(NCZ_Codec* c); #endif /*ZFILTER_H*/ diff --git a/libnczarr/zformat.c b/libnczarr/zformat.c new file mode 100644 index 0000000000..d118a57d23 --- /dev/null +++ b/libnczarr/zformat.c @@ -0,0 +1,407 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" +#include "zformat.h" +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +#include "zfilter.h" +#endif + +/**************************************************/ + +struct ZOBJ NCZ_emptyzobj(void) +{ + static struct ZOBJ empty = {NULL,NULL,0}; + return empty; +} + +/**************************************************/ + +extern int NCZF2_initialize(void); +extern int NCZF2_finalize(void); +extern int NCZF3_initialize(void); +extern int NCZF3_finalize(void); + +/**************************************************/ + +int +NCZF_initialize(void) +{ + int stat = NC_NOERR; + if((stat=NCZF2_initialize())) goto done; + if((stat=NCZF3_initialize())) goto done; +done: + return THROW(stat); +} + +int +NCZF_finalize(void) +{ + int stat = NC_NOERR; + if((stat=NCZF2_finalize())) goto done; + if((stat=NCZF3_finalize())) goto done; +done: + return THROW(stat); +} + + +/**************************************************/ +/*File-Level Operations*/ +int +NCZF_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->create(file,uri,map); + return THROW(stat); +} + +int +NCZF_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->open(file,uri,map); + return THROW(stat); +} + +int +NCZF_close(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->close(file); + return THROW(stat); +} + +/*Read JSON Metadata*/ +int +NCZF_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->download_grp(file,grp,zobj); + return THROW(stat); +} + +int +NCZF_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->download_var(file,var,zobj); + return THROW(stat); +} + +int +NCZF_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zgroup, NCjson** jzgrpp, NCjson** jzsuperp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_group(file,grp,zgroup,jzgrpp,jzsuperp); + return THROW(stat); +} + +int +NCZF_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zformat, int* nczformat) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_superblock(file,jsuper,zformat,nczformat); + return THROW(stat); +} + +int +NCZF_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimrefs) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_nczarr_group(file,grp,jnczgrp,vars,subgrps,dimrefs); + return THROW(stat); +} + +int +NCZF_decode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj, NClist* jfilters, size64_t** shapesp, size64_t** chunksp, NClist* dimrefs) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_var(file,var,zobj,jfilters,shapesp,chunksp,dimrefs); + return THROW(stat); +} + +int +NCZF_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_attributes(file,container,jatts); + return THROW(stat); +} + +int +NCZF_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->upload_grp(file,grp,zobj); + return THROW(stat); +} + +int +NCZF_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->upload_var(file,var,zobj); + return THROW(stat); +} + +/*Write JSON Metadata*/ +int +NCZF_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_superblock(file,jsuperp); + return THROW(stat); +} + +int +NCZF_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jzgroupp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_nczarr_group(file,grp,jzgroupp); + return THROW(stat); +} + +int +NCZF_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_group(file,grp,jgroupp); + return THROW(stat); +} + +int +NCZF_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jzvarp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_nczarr_array(file,var,jzvarp); + return THROW(stat); +} + +int +NCZF_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_var(file,var,filtersj,jvarp); + return THROW(stat); +} + +int +NCZF_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_attributes(file,container,jnczconp,jsuperp,jattsp); + return THROW(stat); +} + + +/*Filter Processing*/ +int +NCZF_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_filter(file,filter,jfilterp); + return THROW(stat); +} + +int +NCZF_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_filter(file,var,jfilter,filter); + return THROW(stat); +} + +/*Search*/ +int +NCZF_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->searchobjects(file,grp,varnames,subgrpnames); + return THROW(stat); +} + +/*Chunkkeys*/ + +/* +From Zarr Specification: +"The compressed sequence of bytes for each chunk is stored under +a key formed from the index of the chunk within the grid of +chunks representing the array. To form a string key for a +chunk, the indices are converted to strings and concatenated +with the dimension_separator character ('.' or '/') separating +each index. For example, given an array with shape (10000, +10000) and chunk shape (1000, 1000) there will be 100 chunks +laid out in a 10 by 10 grid. The chunk with indices (0, 0) +provides data for rows 0-1000 and columns 0-1000 and is stored +under the key "0.0"; the chunk with indices (2, 4) provides data +for rows 2000-3000 and columns 4000-5000 and is stored under the +key "2.4"; etc." +*/ + +int +NCZF_encode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* chunkindices, char dimsep, char** keyp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_chunkkey(file,var,rank,chunkindices,dimsep,keyp); + return THROW(stat); +} + +int +NCZF_decode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->decode_chunkkey(file,var,chunkname,rankp,chunkindicesp); + return THROW(stat); +} + +/* _ARRAY_DIMENSIONS (xarray) encode/decode */ +int +NCZF_encode_xarray(NC_FILE_INFO_T* file, size_t rank, NC_DIM_INFO_T** dims, char** xarraydims, size_t* zarr_rank) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + stat = zfile->dispatcher->encode_xarray(file,rank,dims,xarraydims,zarr_rank); + return THROW(stat); +} + +/* Format specific default dimension separator */ +char +NCZF_default_dimension_separator(NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + return zfile->dispatcher->default_dimension_separator(file); +} + +/**************************************************/ +/* Misc. */ + +void +NCZ_clear_zobj(struct ZOBJ* zobj) +{ + if(zobj != NULL) { + NCZ_reclaim_json(zobj->jobj); + if(!zobj->constjatts) NCZ_reclaim_json(zobj->jatts); + memset(zobj,0,sizeof(struct ZOBJ)); + } +} + +void +NCZ_reclaim_zobj(struct ZOBJ* zobj) +{ + NCZ_clear_zobj(zobj); + nullfree(zobj); +} + +void +NCZ_reclaim_json(NCjson* json) +{ + NCJreclaim(json); +} + +void +NCZ_reclaim_json_list(NClist* listj) +{ + size_t i; + for(i=0;i need to reclaim jatts field */ +}; + +extern struct ZOBJ NCZ_emptyzobj(void); + +/* Hold attribute info */ +struct Ainfo { + nc_type type; + int endianness; + size_t typelen; +}; + +/* Capture arguments for ncz4_create_var */ +struct CVARGS { + const char* varname; + nc_type vtype; + int storage; + int scalar; + int endianness; + size_t maxstrlen; + char dimension_separator; + char order; + size_t rank; + size64_t shapes[NC_MAX_VAR_DIMS]; + size64_t chunks[NC_MAX_VAR_DIMS]; + int dimids[NC_MAX_VAR_DIMS]; + NClist* filterlist; + int no_fill; + void* fill_value; +}; + + +/* This is the dispatch table, with a pointer to each netCDF + * function. */ +typedef struct NCZ_Formatter { + int nczarr_format; + int zarr_format; + int dispatch_version; /* Version of the dispatch table */ + +/**************************************************/ + +/*File-Level Operations*/ +int (*create)(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +int (*open)(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +int (*close)(NC_FILE_INFO_T* file); + +/*Read JSON Metadata*/ +int (*download_grp)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +int (*download_var)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); + +int (*decode_group)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj, NCjson** jzgrpp, NCjson** jzsuperp); +int (*decode_superblock)(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zarrformat, int* nczarrformat); +int (*decode_nczarr_group)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs); +int (*decode_var)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj, NClist* jfilters, size64_t** shapesp, size64_t** chunksp, NClist* dimrefs); +int (*decode_attributes)(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts); + +/*Write JSON Metadata*/ +int (*upload_grp)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +int (*upload_var)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); + +int (*encode_superblock)(NC_FILE_INFO_T* file, NCjson** jsuperp); +int (*encode_nczarr_group)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jzgroupp); +int (*encode_group)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp); + +int (*encode_nczarr_array)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jzvarp); +int (*encode_var)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp); + +int (*encode_attributes)(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp); + +/*Filter Processing*/ +int (*encode_filter)(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp); +int (*decode_filter)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter); + +/*Search*/ +int (*searchobjects)(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames); + +/*Chunkkeys*/ +int (*encode_chunkkey)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* chunkindices, char dimsep, char** keyp); +int (*decode_chunkkey)(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp); + +/* _ARRAY_DIMENSIONS encode/decode */ +int (*encode_xarray) (NC_FILE_INFO_T* file, size_t rank, NC_DIM_INFO_T** dims, char** xarraydims, size_t* zarr_rankp); + +/* Misc. */ +char (*default_dimension_separator) (NC_FILE_INFO_T* file); + +} NCZ_Formatter; + +#if defined(__cplusplus) +extern "C" { +#endif + +/* Called by nc_initialize and nc_finalize respectively */ +extern int NCZF_initialize(void); +extern int NCZF_finalize(void); + +/* Wrappers for the formatter functions */ + +/*File-Level Operations*/ +extern int NCZF_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +extern int NCZF_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +extern int NCZF_close(NC_FILE_INFO_T* file); + +/*Read JSON Metadata*/ +extern int NCZF_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +extern int NCZF_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); + +extern int NCZF_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* jgroup, NCjson** jzgrpp, NCjson** jzsuperp); +extern int NCZF_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zarrformat, int* nczarrformat); +extern int NCZF_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs); +extern int NCZF_decode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj, NClist* jfilters, size64_t** shapesp, size64_t** chunksp, NClist* dimrefs); +extern int NCZF_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts); + +/*Write JSON Metadata*/ +extern int NCZF_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +extern int NCZF_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); + +extern int NCZF_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp); +extern int NCZF_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jzgroupp); +extern int NCZF_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp); +extern int NCZF_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jzvarp); +extern int NCZF_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp); +extern int NCZF_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczvar, NCjson** jsuperp, NCjson** jattsp); + +/*Filter Processing*/ +extern int NCZF_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp); +extern int NCZF_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter); + +/*Search*/ +extern int NCZF_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames); + +/*Chunkkeys*/ +extern int NCZF_encode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* chunkindices, char dimsep, char** keyp); +extern int NCZF_decode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp); + +/* _ARRAY_DIMENSIONS (xarray) encode/decode */ +extern int NCZF_encode_xarray(NC_FILE_INFO_T* file, size_t rank, NC_DIM_INFO_T** dims, char** xarraydims, size_t* zarr_rankp); + +/* Format specific default dimension separator */ +extern char NCZF_default_dimension_separator(NC_FILE_INFO_T* file); + +/* Define known dispatch tables and initializers */ +/* Each handles a specific NCZarr format + Pure Zarr */ +/* WARNING: there is a lot of similar code in the dispatchers, + so fixes to one may need to be propagated to the other dispatchers. +*/ + +extern const NCZ_Formatter* NCZ_formatter2; /* NCZarr V2 dispatch table => Zarr V2 */ +extern const NCZ_Formatter* NCZ_formatter3; /* NCZarr V3 dispatch table => Zarr V3*/ + +/**************************************************/ + +/* Use inference to get map and the formatter */ +extern int NCZ_get_map(NC_FILE_INFO_T* file, NCURI* url, mode_t mode, size64_t constraints, void* params, NCZMAP** mapp); + +extern int NCZ_get_create_formatter(NC_FILE_INFO_T* file, const NCZ_Formatter** formatterp); +extern int NCZ_get_open_formatter(NC_FILE_INFO_T* file, const NCZ_Formatter** formatterp); + +extern int NCZ_infer_open_zarr_format(NC_FILE_INFO_T* file); +extern int NCZ_infer_open_nczarr_format(NC_FILE_INFO_T* file); + +/**************************************************/ +/* Misc. */ +extern void NCZ_clear_zobj(struct ZOBJ* zobj); +extern void NCZ_reclaim_zobj(struct ZOBJ* zobj); +extern void NCZ_reclaim_json(NCjson* json); +extern void NCZ_reclaim_json_list(NClist* listj); + +/**************************************************/ + +#if defined(__cplusplus) +} +#endif + +/* Misc. Macros */ +#define TESTPUREZARR if(zinfo->flags & FLAG_PUREZARR) purezarr = 1; +#define TESTXARRAY if(zinfo->flags & FLAG_XARRAYDIMS) xarray = 1; + + +#endif /* ZFORMAT_H */ diff --git a/libnczarr/zformat2.c b/libnczarr/zformat2.c new file mode 100644 index 0000000000..9a403e6372 --- /dev/null +++ b/libnczarr/zformat2.c @@ -0,0 +1,1727 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ +/** + * @file + * + * @author Dennis Heimbigner + */ + +#include "zincludes.h" +#include "zplugins.h" +#include "znc4.h" +#include "zfill.h" +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +#include "netcdf_filter_build.h" +#endif + +/**************************************************/ + +/*Mnemonics*/ +#define ISATTR 1 + +/**************************************************/ +/* Static zarr type name table */ +/* Used to convert nc_type <=> dtype */ +static const struct ZTYPESV2 { + const char* dtype; + const char* dtypeattr; +} znamesv2[N_NCZARR_TYPES] = { +/* nc_type dtype */ +/*NC_NAT*/ {NULL,0}, +/*NC_BYTE*/ {"|i1",NULL}, +/*NC_CHAR*/ {">S1",NULL}, +/*NC_SHORT*/ {"|i2",NULL}, +/*NC_INT*/ {"|i4",NULL}, +/*NC_FLOAT*/ {"|f4",NULL}, +/*NC_DOUBLE*/ {"|f8",NULL}, +/*NC_UBYTE*/ {"|u1",NULL}, +/*NC_USHORT*/ {"|u2",NULL}, +/*NC_UINT*/ {"|u4",NULL}, +/*NC_INT64*/ {"|i8",NULL}, +/*NC_UINT64*/ {"|u8",NULL}, +/*NC_STRING*/ {"|S%d",NULL}, +/*NC_JSON*/ {">S1",NC_JSON_DTYPE_V2} /* NCZarr internal type */ +}; + +/**************************************************/ +/* Forward */ + +static int ZF2_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +static int ZF2_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +static int ZF2_close(NC_FILE_INFO_T* file); +static int ZF2_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +static int ZF2_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); +static int ZF2_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* jgroup, NCjson** jzgrpp, NCjson** jzsuperp); +static int ZF2_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zarrformat, int* nczarrformat); +static int ZF2_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs); +static int ZF2_decode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj, NClist* jfilters, size64_t** shapep, size64_t** chunksp, NClist* dimrefs); +static int ZF2_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts); +static int decode_var_dimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* shapes, const NCjson* xarray, const NCjson* jdimrefs, NClist* dimrefs); +static int ZF2_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +static int ZF2_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); +static int ZF2_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp); +static int ZF2_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jzgroupp); +static int ZF2_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp); +static int ZF2_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jzvarp); +static int ZF2_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp); +static int ZF2_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp); +static int ZF2_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames); +static int ZF2_encode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* chunkindices, char dimsep, char** keyp); +static int ZF2_decode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp); +static int ZF2_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp); +static int ZF2_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter); +static int ZF2_encode_xarray(NC_FILE_INFO_T* file, size_t rank, NC_DIM_INFO_T** dims, char** xarraydims, size_t* zarr_rankp); +static char ZF2_default_dimension_separator(NC_FILE_INFO_T* file); + +static int decode_dim_decls(NC_FILE_INFO_T* file, const NCjson* jdims, NClist* dimdefs); +static int dtype2nctype(const char* dtype, nc_type* nctypep, int* endianp, size_t* typelenp); +static int nctype2dtype(nc_type nctype, int endianness, size_t typesize, char** dtypep, char** dattrtypep); +static int computeattrinfo(NC_FILE_INFO_T* file, nc_type typehint, const char* aname, const NCjson* jtypes, const NCjson* jdata, struct NCZ_AttrInfo* ainfo); + +/**************************************************/ +/* Format dispatch table */ + +static const NCZ_Formatter NCZ_formatter2_table = +{ + NCZARRFORMAT2, + ZARRFORMAT2, + NCZ_FORMATTER_VERSION, + + /*File-Level Operations*/ + ZF2_create, + ZF2_open, + ZF2_close, + + /*Read JSON Metadata*/ + ZF2_download_grp, + ZF2_download_var, + + ZF2_decode_group, + ZF2_decode_superblock, + ZF2_decode_nczarr_group, + ZF2_decode_var, + ZF2_decode_attributes, + + /*Write JSON Metadata*/ + ZF2_upload_grp, + ZF2_upload_var, + + ZF2_encode_superblock, + ZF2_encode_nczarr_group, + ZF2_encode_group, + + ZF2_encode_nczarr_array, + ZF2_encode_var, + + ZF2_encode_attributes, + + /*Filter Processing*/ + ZF2_encode_filter, + ZF2_decode_filter, + + /*Search*/ + ZF2_searchobjects, + + /*Chunkkeys*/ + ZF2_encode_chunkkey, + ZF2_decode_chunkkey, + + /*_ARRAY_DIMENSIONS*/ + ZF2_encode_xarray, + + /* Per-format default dimension separator */ + ZF2_default_dimension_separator, +}; + +const NCZ_Formatter* NCZ_formatter2 = &NCZ_formatter2_table; + +int +NCZF2_initialize(void) +{ + return NC_NOERR; +} + +int +NCZF2_finalize(void) +{ + return NC_NOERR; +} + +/**************************************************/ + +/*File-Level Operations*/ + +/** + * @internal Synchronize file metadata from internal to map. + * + * @param file Pointer to file info struct. + * + * @return ::NC_NOERR No error. + * @author Dennis Heimbigner + */ +static int +ZF2_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + NC_UNUSED(uri); + NC_UNUSED(map); + ZTRACE(4,"file=%s",file->controller->path); + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + return ZUNTRACE(THROW(stat)); +} + +static int +ZF2_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + NC_UNUSED(uri); + NC_UNUSED(map); + ZTRACE(4,"file=%s",file->controller->path); + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + return ZUNTRACE(THROW(stat)); +} + +int +ZF2_close(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NC_UNUSED(file); + return THROW(stat); +} + +/**************************************************/ + +/*Dowload JSON Metadata*/ +int +ZF2_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + NC_UNUSED(grp); + + /* Download .zgroup and .zattrs */ + if((stat = NCZ_grpkey(grp,&fullpath))) goto done; + if((stat = nczm_concat(fullpath,Z2GROUP,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_GROUP,key,&zobj->jobj))) goto done; + nullfree(key); key = NULL; + if((stat = nczm_concat(fullpath,Z2ATTRS,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_ATTRS,key,&zobj->jatts))) goto done; + zobj->constjatts = 0; + +done: + nullfree(key); + nullfree(fullpath); + return THROW(stat); +} + +int +ZF2_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Download .zgroup and .zattrs */ + if((stat = NCZ_varkey(var,&fullpath))) goto done; + if((stat = nczm_concat(fullpath,Z2ARRAY,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_GROUP,key,&zobj->jobj))) goto done; + nullfree(key); + if((stat = nczm_concat(fullpath,Z2ATTRS,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_ATTRS,key,&zobj->jatts))) goto done; + nullfree(key); key = NULL; + zobj->constjatts = 0; + +done: + nullfree(key); + nullfree(fullpath); + return THROW(stat); +} + +int +ZF2_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj, NCjson** jzgrpp, NCjson** jzsuperp) +{ + int stat = NC_NOERR; + NCjson* jzgrp = NULL; + NCjson* jzsuper = NULL; + + NC_UNUSED(file); + NC_UNUSED(grp); + if(zobj->jatts != NULL) { + /* Extract _nczarr_group from zobj->attr */ + NCJcheck(NCJdictget(zobj->jatts,NC_NCZARR_GROUP_ATTR,&jzgrp)); + /* Extract _nczarr_superblock from zobj->attr */ + NCJcheck(NCJdictget(zobj->jatts,NC_NCZARR_SUPERBLOCK_ATTR,&jzsuper)); + } + if(jzgrpp != NULL) *jzgrpp = jzgrp; + if(jzsuperp != NULL) *jzsuperp = jzsuper; + +done: + return THROW(stat); +} + +int +ZF2_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zformatp, int* nczformatp) +{ + int stat = NC_NOERR; + const NCjson* format = NULL; + int zformat = 0; + int nczformat = 0; + + NC_UNUSED(file); + assert(jsuper != NULL); + + if(zformatp) *zformatp = 0; + if(nczformatp) *nczformatp = 0; + + /* Extract the zarr format number */ + NCJcheck(NCJdictget(jsuper,"zarr_format",(NCjson**)&format)); + if(format != NULL) { + if(NCJsort(format) != NCJ_INT) {stat = NC_ENOTZARR; goto done;} + if(1!=sscanf(NCJstring(format),ZARR_FORMAT_VERSION_TEMPLATE,&zformat)) {stat = NC_ENOTZARR; goto done;} + } + /* Extract the nczarr format number */ + NCJcheck(NCJdictget(jsuper,"nczarr_format",(NCjson**)&format)); + if(format != NULL) { + if(NCJsort(format) != NCJ_INT) {stat = NC_ENOTZARR; goto done;} + if(1!=sscanf(NCJstring(format),NCZARR_FORMAT_VERSION_TEMPLATE,&nczformat)) {stat = NC_ENOTZARR; goto done;} + } + + if(zformatp) *zformatp = zformat; + if(nczformatp) *nczformatp = nczformat; + +done: + return THROW(stat); +} + +int +ZF2_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs) +{ + int stat = NC_NOERR; + size_t i; + const NCjson* jvalue = NULL; + + NC_UNUSED(grp); + + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); + + NCJcheck(NCZ_dictgetalt2(jnczgrp,&jvalue,"dimensions","dims")); + if(jvalue != NULL) { + if(NCJsort(jvalue) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} + /* Decode the dimensions defined in this group */ + if((stat = decode_dim_decls(file,jvalue,dimdefs))) goto done; + } + + NCJcheck(NCZ_dictgetalt2(jnczgrp,&jvalue,"arrays","vars")); + if(jvalue != NULL) { + /* Extract the variable names in this group */ + for(i=0;iformat_file_info; + int purezarr = 0; + /* per-variable info */ + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + const NCjson* jvar = NULL; + const NCjson* jatts = NULL; + const NCjson* jncvar = NULL; + const NCjson* jdimrefs = NULL; + const NCjson* jvalue = NULL; + const NCjson* jxarray = NULL; + int suppress = 0; /* Abort processing of this variable */ + nc_type vtype = NC_NAT; + size_t maxstrlen = 0; + size_t netcdf_rank = 0; /* true rank => scalar => 0 */ + size_t zarr_rank = 0; /* |shape| */ + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); + size64_t* shapes = NULL; + size64_t* chunks = NULL; +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + int varsized = 0; + const NCjson* jfilter = NULL; +#endif + +#ifndef NETCDF_ENABLE_NCZARR_FILTERS + NC_UNUSED(filtersj); +#endif + + TESTPUREZARR; + + jvar = zobj->jobj; + assert(jvar != NULL); + jatts = zobj->jatts; + + /* Verify the format */ + { + int format; + NCJcheck(NCJdictget(jvar,"zarr_format",(NCjson**)&jvalue)); + sscanf(NCJstring(jvalue),ZARR_FORMAT_VERSION_TEMPLATE,&format); + if(format != zinfo->zarr.zarr_format) {stat = (THROW(NC_ENCZARR)); goto done;} + } + + /* Set the type and endianness of the variable */ + { + int endianness; + NCJcheck(NCJdictget(jvar,"dtype",(NCjson**)&jvalue)); + /* Convert dtype to nc_type + endianness */ + if((stat = dtype2nctype(NCJstring(jvalue),&vtype,&endianness,&maxstrlen))) goto done; + if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { + /* Locate the NC_TYPE_INFO_T object */ + if((stat = ncz_gettype(file,var->container,vtype,&var->type_info))) goto done; + } else {stat = NC_EBADTYPE; goto done;} + var->endianness = endianness; + var->type_info->endianness = var->endianness; /* Propagate */ + if(vtype == NC_STRING) { + zsetmaxstrlen(maxstrlen,var); + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_NCZARR_MAXSTRLEN_ATTR,DA_MAXSTRLEN,FIXATT))) goto done; + } + } + + { /* Extract the shape */ + NCJcheck(NCJdictget(jvar,"shape",(NCjson**)&jvalue)); + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENOTZARR); goto done;} + zarr_rank = NCJarraylength(jvalue); + if(zarr_rank == 0) {stat = THROW(NC_ENOTZARR); goto done;} + if((shapes = (size64_t*)calloc(zarr_rank,sizeof(size64_t)))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat=NCZ_decodesizet64vec(jvalue, &zarr_rank, shapes))) goto done; + } + + /* + * Collect the dimension names for this variable. + * In order of preference: + * 1. _nczarr_var.dimensions -- the name are FQNs. + * 2. _ARRAY_DIMENSIONS -- the xarray names are relative names and are scoped to root group. + * 3. _Anonymous_Dim_n -- scoped to root group and n is the length of the dimensions. + */ + if(!purezarr) { + if(jatts == NULL) {stat = NC_ENCZARR; goto done;} + /* Extract the _NCZARR_ARRAY values */ + /* Do this first so we know about storage esp. scalar */ + /* Extract the NCZ_ARRAY dict */ + if((stat = NCZ_getnczarrkey(file,zobj,NC_NCZARR_ARRAY_ATTR,&jncvar))) goto done; + if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} + assert((NCJsort(jncvar) == NCJ_DICT)); + /* Extract scalar flag */ + NCJcheck(NCJdictget(jncvar,"scalar",(NCjson**)&jvalue)); + if(jvalue != NULL) zvar->scalar = 1; + /* Ignore storage flag and treat everything as chunked */ + var->storage = NC_CHUNKED; + /* Extract dimrefs list */ + if((stat = NCZ_dictgetalt2(jncvar,&jdimrefs,"dimension_references","dimrefs"))) goto done; + if(jdimrefs != NULL) { + assert((NCJsort(jdimrefs) == NCJ_ARRAY)); + if(zvar->scalar) { + assert(NCJarraylength(jdimrefs) == 1); + } + } + } + if((zinfo->flags & FLAG_XARRAYDIMS) && jdimrefs == NULL && zobj->jatts != NULL) { /* Try XARRAY Attribute */ + NCJcheck(NCJdictget(zobj->jatts,NC_XARRAY_DIMS,(NCjson**)&jxarray)); + if(jxarray != NULL) { + assert((NCJsort(jxarray) == NCJ_ARRAY) || (NCJsort(jxarray) == NCJ_STRING)); + if(NCJarraylength(jvalue) != zarr_rank) {stat = NC_ENOTZARR; goto done;} + } + } + + /* Process dimrefs (might be NULL) */ + if((stat = decode_var_dimrefs(file,var,zarr_rank,shapes,jxarray,jdimrefs,dimrefs))) goto done; + + /* Rank processing */ + { + if(zarr_rank == 0) { + /* suppress variable */ + ZLOG(NCLOGWARN,"Empty shape for variable %s suppressed",var->hdr.name); + suppress = 1; + goto suppressvar; + } + if(zvar->scalar) + netcdf_rank = 0; + else + netcdf_rank = nclistlength(dimrefs); + /* Set the rank of the variable */ + if((stat = nc4_var_set_ndims(var, (int)netcdf_rank))) goto done; + } + + /* Capture dimension_separator (must precede chunk cache creation) */ + { + NCglobalstate* ngs = NC_getglobalstate(); + assert(ngs != NULL); + zvar->dimension_separator = 0; + NCJcheck(NCJdictget(jvar,"dimension_separator",(NCjson**)&jvalue)); + if(jvalue != NULL) { + /* Verify its value */ + if(NCJsort(jvalue) == NCJ_STRING && NCJstring(jvalue) != NULL && strlen(NCJstring(jvalue)) == 1) + zvar->dimension_separator = NCJstring(jvalue)[0]; + } + /* If value is invalid, then use global default */ + if(!islegaldimsep(zvar->dimension_separator)) + zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global value */ + assert(islegaldimsep(zvar->dimension_separator)); /* we are hosed */ + } + + /* fill_value; must precede calls to adjust cache */ + { + NCJcheck(NCJdictget(jvar,"fill_value",(NCjson**)&jvalue)); + if(jvalue == NULL || NCJsort(jvalue) == NCJ_NULL) { + var->no_fill = NC_NOFILL; + if((stat = NCZ_disable_fill(file,var))) goto done; + } else { /* Fill in var->fill_value */ + var->no_fill = NC_FILL; + NCZ_clearAttrInfo(file,&ainfo); + ainfo.name = NC_FillValue; + ainfo.nctype = vtype; + if((stat = NCZ_computeattrdata(file,jvalue,&ainfo))) goto done; + /* Create var->fill_value */ + assert(ainfo.nctype == vtype); + if((stat = NCZ_set_dual_obj_data(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,ainfo.datalen,ainfo.data))) goto done; + /* propagate to _FillValue attribute */ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXATT))) goto done; + /* clear+reclaim ainfo */ + NCZ_clearAttrInfo(file,&ainfo); + } + } + + /* chunks */ + { + NCJcheck(NCJdictget(jvar,"chunks",(NCjson**)&jvalue)); + if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) + {stat = (THROW(NC_ENCZARR)); goto done;} + if(zarr_rank == 0) {stat = NC_ENCZARR; goto done;} + if(var->ndims != netcdf_rank) {stat = (THROW(NC_ENCZARR)); goto done;} + var->storage = NC_CHUNKED; + if((chunks = malloc(sizeof(size64_t)*zarr_rank)) == NULL) {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_decodesizet64vec(jvalue, &zarr_rank, chunks))) + {stat = NC_ENOMEM; goto done;} + } + + /* Capture row vs column major; currently, column major not used*/ + { + NCJcheck(NCJdictget(jvar,"order",(NCjson**)&jvalue)); + if(strcmp(NCJstring(jvalue),"C") > 0) + ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 1; + else ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 0; + } + +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + /* filters key */ + /* From V2 Spec: A list of JSON objects providing codec configurations, + or null if no filters are to be applied. Each codec configuration + object MUST contain a "id" key identifying the codec to be used. */ + /* Do filters key before compressor key so final filter chain is in correct order */ + if(var->filters == NULL) var->filters = (void*)nclistnew(); + if((stat = NCZ_filter_initialize())) goto done; + { + NCJcheck(NCJdictget(jvar,"filters",(NCjson**)&jvalue)); + if(jvalue != NULL && NCJsort(jvalue) != NCJ_NULL) { + int k; + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = NC_EFILTER; goto done;} + for(k=0;;k++) { + jfilter = NULL; + jfilter = NCJith(jvalue,k); + if(jfilter == NULL) break; /* done */ + if(NCJsort(jfilter) != NCJ_DICT) {stat = NC_EFILTER; goto done;} + nclistpush(filtersj,jfilter); + } + } + } + /* compressor key */ + /* From V2 Spec: A JSON object identifying the primary compression codec and providing + configuration parameters, or ``null`` if no compressor is to be used. */ + { + NCJcheck(NCJdictget(jvar,"compressor",(NCjson**)&jfilter)); + if(jfilter != NULL && NCJsort(jfilter) != NCJ_NULL) { + if(NCJsort(jfilter) != NCJ_DICT) {stat = NC_EFILTER; goto done;} + nclistpush(filtersj,jfilter); + } + } + /* Suppress variable if there are filters and var is not fixed-size */ + if(varsized && nclistlength((NClist*)var->filters) > 0) + suppress = 1; +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + +#if 0 + if(zarr_rank > 0) { + /* Convert dimrefs to specific dimensions */ + if((stat = computedimrefs(file, var, netcdf_rank, dimnames, shapes, var->dim))) goto done; + if(!zvar->scalar) { + /* Extract the dimids */ + for(j=0;jdimids[j] = var->dim[j]->hdr.id; + } + } +#endif /*0*/ + +suppressvar: + if(suppress) { + NC_GRP_INFO_T* grp = var->container; + /* Reclaim NCZarr variable specific info */ + (void)NCZ_zclose_var1(var); + /* Remove from list of variables and reclaim the top level var object */ + (void)nc4_var_list_del(grp, var); + var = NULL; + } + + if(shapesp) {*shapesp = shapes; shapes = NULL;} + if(chunksp) {*chunksp = chunks; chunks = NULL;} + +done: + nullfree(chunks); + NCZ_clearAttrInfo(file,&ainfo); + nullfree(shapes); shapes = NULL; + return THROW(stat); +} + +int +ZF2_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts) +{ + int stat = NC_NOERR; + size_t i; + NC_VAR_INFO_T* var = NULL; + NC_GRP_INFO_T* grp = NULL; + NC_ATT_INFO_T* att = NULL; + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); + const NCjson* jtypes = NULL; + const NCjson* jnczattr = NULL; + nc_type typehint = NC_NAT; + + if(container->sort == NCGRP) { + grp = ((NC_GRP_INFO_T*)container); + } else { + var = ((NC_VAR_INFO_T*)container); + } + + /* See if we have jtypes */ + if((stat = NCZ_dictgetalt2(jatts,&jnczattr,NC_NCZARR_ATTRS_ATTR,NCZ_ATTR_OLD_ATTR))) goto done; + if(jnczattr != NULL) { + NCJcheck(NCJdictget(jnczattr,"types",(NCjson**)&jtypes)); + } + + if(jatts != NULL && NCJsort(jatts)==NCJ_DICT) { + for(i=0;iparent == NULL && strcmp(aname,NC_NCZARR_DFALT_MAXSTRLEN_ATTR)==0) + isdfaltmaxstrlen = 1; + if(var != NULL && strcmp(aname,NC_NCZARR_MAXSTRLEN_ATTR)==0) + ismaxstrlen = 1; + if(var != NULL && strcmp(aname,NC_FillValue)==0) + isfillvalue = 1; + /* See if this is reserved attribute */ + ra = NC_findreserved(aname); + if(ra != NULL) { + /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ + if(strcmp(aname,NCPROPS)==0 && grp != NULL && file->root_grp == grp) { + /* Setup provenance */ + if(NCJsort(javalue) != NCJ_STRING) + {stat = (THROW(NC_ENCZARR)); goto done;} /*malformed*/ + if((stat = NCZ_read_provenance(file,aname,NCJstring(javalue)))) goto done; + } +#if 0 + /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ + if(strcmp(aname,NC_XARRAY_DIMS)==0 && var != NULL && (ra->flags & HIDDENATTRFLAG)) { + /* store for later */ + size_t i; + assert(NCJsort(value) == NCJ_ARRAY); + if((zvar->xarray = nclistnew())==NULL) + {stat = NC_ENOMEM; goto done;} + for(i=0;ixarray,strdup(NCJstring(k))); + } + } +#endif + /* case other: if attribute is hidden */ + if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ + } + + /* Create the attribute */ + if(var != NULL && strcmp(aname,NC_FillValue)==0) + typehint = var->type_info->hdr.id; /* use var type as hint */ + else + typehint = NC_NAT; + /* Collect the attribute's type and value */ + NCZ_clearAttrInfo(file,&ainfo); + if((stat = computeattrinfo(file,typehint,aname,jtypes,javalue,&ainfo))) goto done; + if((stat = ncz_makeattr(file,container,&ainfo,&att))) goto done; + if(isfillvalue) { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXOBJ))) goto done; + } + if(ismaxstrlen && att->nc_typeid == NC_INT) { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_NCZARR_MAXSTRLEN_ATTR,DA_MAXSTRLEN,FIXOBJ))) goto done; + } + if(isdfaltmaxstrlen && att->nc_typeid == NC_INT) { + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)grp,NC_NCZARR_DFALT_MAXSTRLEN_ATTR,DA_DFALTSTRLEN,FIXOBJ))) goto done; + } + } + } + +done: + NCZ_clearAttrInfo(file,&ainfo); + return THROW(stat); +} + +/**************************************************/ + +int +ZF2_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Construct grp key */ + if((stat = NCZ_grpkey(grp,&fullpath))) goto done; + + /* build ZGROUP path */ + if((stat = nczm_concat(fullpath,Z2GROUP,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_GROUP,key,zobj->jobj))) goto done; + nullfree(key); key = NULL; + + if(zobj->jatts != NULL) { + /* build ZATTRS path */ + if((stat = nczm_concat(fullpath,Z2ATTRS,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_ATTRS,key,zobj->jatts))) goto done; + nullfree(key); key = NULL; + } + +done: + nullfree(fullpath); + nullfree(key); + return THROW(stat); +} + +int +ZF2_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Construct var key */ + if((stat = NCZ_varkey(var,&fullpath))) goto done; + + /* build ZARRAY path */ + if((stat = nczm_concat(fullpath,Z2ARRAY,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_ARRAY,key,zobj->jobj))) goto done; + nullfree(key); key = NULL; + + if(zobj->jatts != NULL) { + /* build ZATTRS path */ + if((stat = nczm_concat(fullpath,Z2ATTRS,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_GROUP,key,zobj->jatts))) goto done; + nullfree(key); key = NULL; + } + +done: + nullfree(fullpath); + nullfree(key); + return THROW(stat); + return THROW(stat); +} + +/*Write JSON Metadata*/ +int +ZF2_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp) +{ + int stat = NC_NOERR; + char version[64]; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jsuper = NULL; + /* create superblock */ + snprintf(version,sizeof(version),NCZARR_FORMAT_VERSION_TEMPLATE, zinfo->zarr.nczarr_format); + NCJnew(NCJ_DICT,&jsuper); + NCJcheck(NCJinsertstring(jsuper,"version",version)); + if(jsuperp) {*jsuperp = jsuper; jsuper = NULL;} +done: + NCJreclaim(jsuper); + return THROW(stat); +} + +int +ZF2_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jnczgrpp) +{ + int stat = NC_NOERR; + size_t i; + NCjson* jnczgrp = NULL; + NCjson* jdims = NULL; + NCjson* jvars = NULL; + NCjson* jsubgrps = NULL; + NCjson* jsize = NULL; + NCjson* junlim = NULL; + NCjson* jdim = NULL; + + NC_UNUSED(file); + NC_UNUSED(grp); + /* Create the NCZ_GROUP dict */ + NCJnew(NCJ_DICT,&jnczgrp); + + /* Collect and encode the grp dimension declarations */ + NCJnew(NCJ_DICT,&jdims); + for(i=0;idim);i++) { + NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)ncindexith(grp->dim,i); + char digits[64]; + snprintf(digits,sizeof(digits),"%zu",dim->len); + NCJcheck(NCJnewstring(NCJ_INT,digits,&jsize)); + NCJcheck(NCJnewstring(NCJ_INT,(dim->unlimited?"1":"0"),&junlim)); + NCJnew(NCJ_DICT,&jdim); + NCJcheck(NCJinsert(jdim,"size",jsize)); jsize = NULL; + NCJcheck(NCJinsert(jdim,"unlimited",junlim)); junlim = NULL; + NCJcheck(NCJinsert(jdims,dim->hdr.name,jdim)); jdim = NULL; + } + NCJcheck(NCJinsert(jnczgrp,"dimensions",jdims)); jdims = NULL; + + /* Collect and insert the variable names in this group */ + NCJnew(NCJ_ARRAY,&jvars); + for(i=0;ivars);i++) { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); + NCJcheck(NCJaddstring(jvars,NCJ_STRING,var->hdr.name)); + } + NCJcheck(NCJinsert(jnczgrp,"arrays",jvars)); jvars = NULL; + + /* Collect and insert the variable names in this group */ + NCJnew(NCJ_ARRAY,&jsubgrps); + for(i=0;ichildren);i++) { + NC_GRP_INFO_T* child = (NC_GRP_INFO_T*)ncindexith(grp->children,i); + NCJcheck(NCJaddstring(jsubgrps,NCJ_STRING,child->hdr.name)); + } + NCJcheck(NCJinsert(jnczgrp,"groups",jsubgrps)); jsubgrps = NULL; + + if(jnczgrpp) {*jnczgrpp = jnczgrp; jnczgrp = NULL;} +done: + NCJreclaim(jnczgrp); + NCJreclaim(jdims); + NCJreclaim(jvars); + NCJreclaim(jsubgrps); + NCJreclaim(jsize); + NCJreclaim(junlim); + NCJreclaim(jdim); + return THROW(stat); +} + +int +ZF2_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jgroup = NULL; + + NC_UNUSED(grp); + NCJcheck(NCJnew(NCJ_DICT,&jgroup)); + NCJcheck(NCJinsertint(jgroup,"zarr_format",zinfo->zarr.zarr_format)); + if(jgroupp) {*jgroupp = jgroup; jgroup = NULL;} +done: + NCJreclaim(jgroup); + return THROW(stat); +} + +int +ZF2_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jnczarrayp) +{ + int stat = NC_NOERR; + NClist* dimrefs = NULL; + NCjson* jnczarray = NULL; + NCjson* jdimrefs = NULL; + NCbytes* dimfqn = ncbytesnew(); + size_t i; + + NC_UNUSED(file); + + NCJnew(NCJ_DICT,&jnczarray); + + if((dimrefs = nclistnew())==NULL) {stat = NC_ENOMEM; goto done;} + if(var->ndims > 0) { + for(i=0;indims;i++) { + NC_DIM_INFO_T* dim = var->dim[i]; + if((stat = NCZ_makeFQN((NC_OBJ*)dim,dimfqn))) goto done; + nclistpush(dimrefs,ncbytesextract(dimfqn)); + } + } else { /*scalar*/ + nclistpush(dimrefs,strdup(DIMSCALAR)); + } + + /* Create the dimrefs json object */ + NCJnew(NCJ_ARRAY,&jdimrefs); + while(nclistlength(dimrefs)>0) { + char* fqn = (char*)nclistremove(dimrefs,0); + NCJaddstring(jdimrefs,NCJ_STRING,fqn); + nullfree(fqn); fqn = NULL; + } + /* Insert dimension_references */ + NCJcheck(NCJinsert(jnczarray,"dimension_references",jdimrefs)); jdimrefs = NULL; + + /* Add the _Storage flag */ + /* Record if this is a scalar */ + if(var->ndims == 0) { + NCJcheck(NCJinsertint(jnczarray,"scalar",1)); + } + + /* everything looks like it is chunked */ + NCJcheck(NCJinsertstring(jnczarray,"storage","chunked")); + + if(jnczarrayp) {*jnczarrayp = jnczarray; jnczarray = NULL;} +done: + nclistfreeall(dimrefs); + ncbytesfree(dimfqn); + NCJreclaim(jnczarray); + NCJreclaim(jdimrefs); + return THROW(stat); +} + +int +ZF2_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp) +{ + int stat = NC_NOERR; + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + NCjson* jvar = NULL; + NCjson* jshape = NULL; + NCjson* jchunks = NULL; + NCjson* jfill = NULL; + NCjson* jcompressor = NULL; + NCjson* jfilters = NULL; + size64_t shape[NC_MAX_VAR_DIMS]; + char number[1024]; + size_t zarr_rank = 0; + size_t i; + + NC_UNUSED(file); +#ifndef NETCDF_ENABLE_NCZARR_FILTERS + NC_UNUSED(filtersj); +#endif + + NCJnew(NCJ_DICT,&jvar); + + /* if scalar */ + if(var->ndims == 0) { + shape[0] = 1; + zarr_rank = 1; + } else { + zarr_rank = var->ndims; + for(i=0;indims;i++) { /* Collect the shape vector */ + NC_DIM_INFO_T* dim = var->dim[i]; + shape[i] = dim->len; + } + } + + /* zarr_format key */ + NCJcheck(NCJinsertint(jvar,"zarr_format",ZARRFORMAT2)); + + /* shape key */ + /* Integer list defining the length of each dimension of the array.*/ + /* Create the list */ + NCJnew(NCJ_ARRAY,&jshape); + for(i=0;itype_info->endianness; + int atomictype = var->type_info->hdr.id; + char* dtypename = NULL; + assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); + if((stat = nctype2dtype(atomictype,endianness,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename,NULL))) goto done; + NCJcheck(NCJinsertstring(jvar,"dtype",dtypename)); + nullfree(dtypename); dtypename = NULL; + } + + /* chunks key */ + /* The zarr format does not support the concept + of contiguous (or compact), so it will never appear in the read case. + */ + /* Create the list of chunksizes */ + NCJnew(NCJ_ARRAY,&jchunks); + if(zvar->scalar) { + NCJaddstring(jchunks,NCJ_INT,"1"); /* one chunk of size 1 */ + } else for(i=0;ichunksizes[i]; + snprintf(number,sizeof(number),"%lld",len); + NCJaddstring(jchunks,NCJ_INT,number); + } + NCJcheck(NCJinsert(jvar,"chunks",jchunks)); jchunks = NULL; + + /* fill_value key */ + if(var->no_fill) { + NCJnew(NCJ_NULL,&jfill); + } else {/*!var->no_fill*/ + int atomictype = var->type_info->hdr.id; + if(var->fill_value == NULL) { + if((stat = NCZ_ensure_fill_value(var))) goto done; + } + /* Convert var->fill_value to a string */ + if((stat = NCZ_stringconvert(atomictype,1,var->fill_value,&jfill))) goto done; + assert(jfill->sort != NCJ_ARRAY); + } + NCJcheck(NCJinsert(jvar,"fill_value",jfill)); jfill = NULL; + + /* order key */ + /* "C" means row-major order, i.e., the last dimension varies fastest; + "F" means column-major order, i.e., the first dimension varies fastest.*/ + /* Default to C for now */ + NCJcheck(NCJinsertstring(jvar,"order","C")); + +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + /* Compressor and Filters */ + if(nclistlength(filtersj) > 0) { + jcompressor = (NCjson*)nclistremove(filtersj,nclistlength(filtersj)-1); + } else +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + { /* no filters at all; default compressor to null */ + NCJnew(NCJ_NULL,&jcompressor); + } + NCJcheck(NCJinsert(jvar,"compressor",jcompressor)); jcompressor = NULL; + + /* filters key */ + /* From V2 Spec: A list of JSON objects providing codec configurations, + or null if no filters are to be applied. Each codec configuration + object MUST contain a "id" key identifying the codec to be used. */ + /* A list of JSON objects providing codec configurations, or ``null`` + if no filters are to be applied. */ +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + if(nclistlength(filtersj) > 0) { + /* jfilters holds the array of encoded filters */ + NCJnew(NCJ_ARRAY,&jfilters); + while(nclistlength(filtersj) > 0) { /* Insert the first n filters; last one was used as compressor */ + NCjson* jfilter = (NCjson*)nclistremove(filtersj,0); + NCJcheck(NCJappend(jfilters,jfilter)); jfilter = NULL; + } + } else +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + { + NCJnew(NCJ_NULL,&jfilters); /* no filters at all */ + } + NCJcheck(NCJinsert(jvar,"filters",jfilters)); jfilters = NULL; + + /* dimension_separator key */ + /* Single char defining the separator in chunk keys */ + if(zvar->dimension_separator != DFALT_DIM_SEPARATOR_V2) { + char sep[2]; + sep[0] = zvar->dimension_separator;/* make separator a string*/ + sep[1] = '\0'; + NCJcheck(NCJinsertstring(jvar,"dimension_separator",sep)); + } + + if(jvarp) {*jvarp = jvar; jvar = NULL;} + +done: + NCJreclaim(jvar); + NCJreclaim(jshape); + NCJreclaim(jchunks); + NCJreclaim(jfill); + NCJreclaim(jcompressor); + NCJreclaim(jfilters); + return THROW(stat); +} + +int +ZF2_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i; + NCindex* atts = NULL; + NCjson* jatts = NULL; + NCjson* jtypes = NULL; + NCjson* jdata = NULL; + NCjson* jnczatt = NULL; + NC_VAR_INFO_T* var = NULL; + NC_GRP_INFO_T* grp = NULL; + char* d2name = NULL; + int endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + const char* nczname = NULL; + int purezarr = 0; + size_t typesize = 0; + + TESTPUREZARR; + + if(container->sort == NCVAR) { + var = (NC_VAR_INFO_T*)container; + atts = var->att; + nczname = NC_NCZARR_ARRAY_ATTR; + } else if(container->sort == NCGRP) { + grp = (NC_GRP_INFO_T*)container; + atts = grp->att; + nczname = NC_NCZARR_GROUP_ATTR; + } + + if(ncindexsize(atts) > 0) { + NCJnew(NCJ_DICT,&jatts); + NCJnew(NCJ_DICT,&jtypes); + + /* Walk all the attributes convert to json and collect the dtype */ + for(i=0;inc_typeid > NC_MAX_ATOMIC_TYPE) {stat = (THROW(NC_ENCZARR)); goto done;} + if(a->nc_typeid == NC_STRING) + typesize = (size_t)NCZ_get_maxstrlen(container); + else { + if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done; + } + + /* Convert to storable json */ + + if(a->nc_typeid == NC_CHAR && NCZ_iscomplexjsonstring(a->hdr.name,a->len,(char*)a->data,&jdata)) { + d2name = strdup(NC_JSON_DTYPE_V2); + } else { + if((stat = NCZ_stringconvert(a->nc_typeid,a->len,a->data,&jdata))) goto done; + /* Collect the corresponding dtype */ + if((stat = nctype2dtype(a->nc_typeid,endianness,typesize,NULL,&d2name))) goto done; + } + + /* Insert the attribute; optionally consumes jdata and d2name */ + if((stat = ncz_insert_attr(jatts,jtypes,a->hdr.name,&jdata,d2name))) goto done; + + /* cleanup */ + nullfree(d2name); d2name = NULL; + nullfree(d2attr); d2attr = NULL; + } + } + + /* Finalize the contents of jtypes and jatts */ + if(!purezarr) { + if(jtypes == NULL) NCJnew(NCJ_DICT,&jtypes); + if(jatts == NULL) NCJnew(NCJ_DICT,&jatts); + /* Insert _nczarr_group|_nczarr_var + type */ + if(jnczconp != NULL && *jnczconp != NULL) { + if((stat = ncz_insert_attr(jatts,jtypes,nczname,jnczconp,NC_JSON_DTYPE_V2))) goto done; + *jnczconp = NULL; + } + /* Insert _nczarr_super (if root group) + type */ + if(jsuperp != NULL && *jsuperp != NULL) { + if((stat=ncz_insert_attr(jatts,jtypes,NC_NCZARR_SUPERBLOCK_ATTR,jsuperp,NC_JSON_DTYPE_V2))) goto done; + *jsuperp = NULL; + } + + /* Build _nczarr_attrs */ + NCJnew(NCJ_DICT,&jnczatt); + NCJcheck(NCJinsert(jnczatt,"types",jtypes)); + /* WARNING, jtypes may undergo further changes */ + /* Insert _nczarr_attrs + type */ + if((stat=ncz_insert_attr(jatts,jtypes,NC_NCZARR_ATTRS_ATTR,&jnczatt,NC_JSON_DTYPE_V2))) goto done; + jtypes = NULL; + assert(*jnczconp == NULL && jnczatt == NULL && jtypes == NULL); + } + + if(jattsp) {*jattsp = jatts; jatts = NULL;} + +done: + nullfree(d2name); + NCJreclaim(jdata); + NCJreclaim(jatts); + NCJreclaim(jtypes); + return THROW(stat); +} + +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +/*Filter Processing*/ +static int +ZF2_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp) +{ + int stat = NC_NOERR; + NCjson* jfilter = NULL; + + NC_UNUSED(file); + + /* assumptions */ + assert(filter->flags & FLAG_WORKING); + + /* Convert the HDF5 id + parameters to the codec form */ + + /* We need to ensure the the current visible parameters are defined and had the opportunity to come + from the working parameters */ + assert((filter->flags & (FLAG_VISIBLE | FLAG_WORKING)) == (FLAG_VISIBLE | FLAG_WORKING)); + + /* Convert the visible parameters back to codec */ + /* Clear any previous codec */ + nullfree(filter->codec.id); filter->codec.id = NULL; + nullfree(filter->codec.codec); filter->codec.codec = NULL; + filter->codec.id = strdup(filter->plugin->codec.codec->codecid); + if(filter->plugin->codec.codec->NCZ_hdf5_to_codec) { + if((stat = filter->plugin->codec.codec->NCZ_hdf5_to_codec(NCplistzarrv2,filter->hdf5.id,filter->hdf5.visible.nparams,filter->hdf5.visible.params,&filter->codec.codec))) goto done; + } else + {stat = NC_EFILTER; goto done;} + + /* Parse the codec as the return */ + NCJcheck(NCJparse(filter->codec.codec,0,&jfilter)); + if(jfilterp) {*jfilterp = jfilter; jfilter = NULL;} + +done: + NCJreclaim(jfilter); + return THROW(stat); +} + +static int +ZF2_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter) +{ + int stat = NC_NOERR; + const NCjson* jvalue = NULL; + NCZ_Plugin* plugin = NULL; + NCZ_Codec codec = NCZ_codec_empty(); + NCZ_HDF5 hdf5 = NCZ_hdf5_empty(); + + NC_UNUSED(file); + + if(var->filters == NULL) var->filters = nclistnew(); + + /* Get the id of this codec filter */ + NCJcheck(NCJdictget(jfilter,"id",(NCjson**)&jvalue)); + if(NCJsort(jvalue) != NCJ_STRING) {stat = THROW(NC_ENOFILTER); goto done;} + + /* Save the codec */ + if((codec.id = strdup(NCJstring(jvalue)))==NULL) {stat = NC_ENOMEM; goto done;} + NCJcheck(NCJunparse(jfilter,0,&codec.codec)); + + /* Find the plugin for this filter */ + if((stat = NCZ_plugin_lookup(codec.id,&plugin))) goto done; + + if(plugin != NULL) { + /* Save the hdf5 id */ + hdf5.id = plugin->codec.codec->hdf5id; + /* Convert the codec to hdf5 form visible parameters */ + if(plugin->codec.codec->NCZ_codec_to_hdf5) { + if((stat = plugin->codec.codec->NCZ_codec_to_hdf5(NCplistzarrv2,codec.codec,&hdf5.id,&hdf5.visible.nparams,&hdf5.visible.params))) + goto done; + } + filter->flags |= FLAG_VISIBLE; + filter->hdf5 = hdf5; hdf5 = NCZ_hdf5_empty(); + filter->codec = codec; codec = NCZ_codec_empty(); + filter->plugin = plugin; plugin = NULL; + } else { + /* Create a fake filter so we do not forget about this codec */ + filter->hdf5 = NCZ_hdf5_empty(); + filter->codec = codec; codec = NCZ_codec_empty(); + } + +done: + ncz_hdf5_clear(&hdf5); + ncz_codec_clear(&codec); + return THROW(stat); +} +#else /*!NETCDF_ENABLE_NCZARR_FILTERS*/ +static int +ZF2_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp) +{ + NC_UNUSED(file); + NC_UNUSED(filter); + if(jfilterp) *jfilterp = NULL; + return NC_NOERR; +} + +static int +ZF2_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter) +{ + NC_UNUSED(file); + NC_UNUSED(var); + NC_UNUSED(jfilter); + NC_UNUSED(filter); + return NC_NOERR; +} +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + +/*Search*/ +int +ZF2_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames) +{ + int stat = NC_NOERR; + size_t i; + char* grpkey = NULL; + char* subgrpkey = NULL; + char* varkey = NULL; + char* zarray = NULL; + char* zgroup = NULL; + NClist* matches = nclistnew(); + + /* Compute the key for the grp */ + if((stat = NCZ_grpkey(grp,&grpkey))) goto done; + if((stat = NCZMD_list(file,grpkey,matches))) goto done; /* Shallow listing */ + /* Search grp for group-level .zxxx and for var-level .zxxx*/ + for(i=0;i 0) ncbytesappend(key,dimsep); + /* Print as decimal with no leading zeros */ + snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]); + ncbytescat(key,sindex); + } + ncbytesnull(key); + if(keyp) *keyp = ncbytesextract(key); + ncbytesfree(key); + return THROW(stat); +} + +int +ZF2_decode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp) +{ + int stat = NC_NOERR; + const char* oldp; + char* newp; + size64_t* chunkindices = NULL; + char sep; + size_t rank,r; + char* chunkkey = strdup(chunkname); + + NC_UNUSED(file); + NC_UNUSED(var); + + /* Figure out the separator char by looking for the first non digit in the chunkkey */ + sep = '\0'; + for(oldp=chunkname;*oldp;oldp++) { + char c = *oldp; + if(c < '0' || c > '9') {sep = c; break;} + } + assert(sep=='\0' || islegaldimsep(sep)); + + /* Pass 1 to get rank and separate the indices*/ + oldp = chunkkey; + if(sep == '\0') + rank = 1; + else for(rank=0;;) { + newp = strchr(oldp,sep); /* look for next sep or eos */ + rank++; + if(newp == NULL) break; + *newp = '\0'; + oldp = newp+1; + } + /* Create index vector */ + if((chunkindices = (size64_t*)malloc(rank*sizeof(size64_t)))==NULL) {stat = NC_ENOMEM; goto done;} + /* Pass 2 to get indices */ + oldp = chunkkey; + for(r=0;rhdr.name; + if(i > 0) ncbytescat(buf,","); + ncbytescat(buf,dimname); + } + ncbytescat(buf,"]"); + if(xarraydimsp) {*xarraydimsp = ncbytesextract(buf);} + if(zarr_rankp) {*zarr_rankp = zarr_rank;} + + ncbytesfree(buf); + return THROW(stat); +} + +static char +ZF2_default_dimension_separator(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return DFALT_DIM_SEPARATOR_V2; +} + +/**************************************************/ +/* Support Functions */ + +static int +decode_dim_decls(NC_FILE_INFO_T* file, const NCjson* jdims, NClist* dimdefs) +{ + int stat = NC_NOERR; + size_t i; + struct NCZ_DimInfo* dimdef = NULL; + + NC_UNUSED(file); + + assert(NCJsort(jdims) == NCJ_DICT); + for(i=0;inorm_name,NCJstring(jname),sizeof(dimdef->norm_name)); + + if(NCJisatomic(jdim)) { /* old-style length only dimension spec */ + NCJcheck(NCJcvt(jdim,NCJ_INT,&cvt)); + dimdef->shape = (size64_t)cvt.ival; + dimdef->unlimited = 0; + } else { + const NCjson* jsize = NULL; + const NCjson* junlim = NULL; + assert(NCJsort(jdim) == NCJ_DICT); + NCJcheck(NCJdictget(jdim,"size",(NCjson**)&jsize)); + NCJcheck(NCJdictget(jdim,"unlimited",(NCjson**)&junlim)); + NCJcheck(NCJcvt(jsize,NCJ_INT,&cvt)); + dimdef->shape = (size64_t)cvt.ival; + memset(&cvt,0,sizeof(cvt)); + NCJcheck(NCJcvt(junlim,NCJ_INT,&cvt)); + dimdef->unlimited = (cvt.ival == 0 ? 0 : 1); + } + nclistpush(dimdefs,dimdef); dimdef = NULL; + } + +done: + NCZ_reclaim_diminfo(dimdef); + return THROW(stat); +} + +static int +decode_var_dimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t zarr_rank, const size64_t* shapes, const NCjson* jxarray, const NCjson* jdimrefs, NClist* dimrefs) +{ + int stat = NC_NOERR; + size_t j; + int purezarr = 0; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + + TESTPUREZARR; + + assert(dimrefs != NULL); + if(!purezarr && jdimrefs != NULL) { /* Use the NCZarr dimension fqns */ + for(j=0;jformat_var_info)->scalar = 1; /* scalar */ + } + } + for(i=0;i N_NCZARR_TYPES) return NC_EINVAL; + dtemplate = znamesv2[nctype].dtype; + dattrtemplate = znamesv2[nctype].dtypeattr; + if(dattrtemplate == NULL) dattrtemplate = dtemplate; + snprintf(dtype,sizeof(dtype),dtemplate,typesize); + snprintf(dattrtype,sizeof(dattrtype),dattrtemplate,typesize); + /* Set endianness */ + switch (nctype) { + case NC_STRING: + case NC_CHAR: + case NC_JSON: + break; + default: + switch (endianness) { + case NC_ENDIAN_LITTLE: dtype[0] = '<'; break; + case NC_ENDIAN_BIG: dtype[0] = '>'; break; + case NC_ENDIAN_NATIVE: default: break; + } + dattrtype[0] = dtype[0]; + } + if(dtypep) *dtypep = strdup(dtype); + if(dattrtypep) *dattrtypep = strdup(dattrtype); + return NC_NOERR; +} + +/* +@internal Convert a numcodecs Zarr v2 dtype spec to a corresponding nc_type. +@param file - [in] +@param dtype - [in] dtype the dtype to convert +@param isattr - [in] 1 => type came from an attribute +@param nctypep - [out] hold corresponding type +@param endianp - [out] hold corresponding endianness +@param maxstrlenp - [out] hold corresponding type size (for fixed length strings) +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + +static int +dtype2nctype(const char* dtype, nc_type* nctypep, int* endianp, size_t* maxstrlenp) +{ + int stat = NC_NOERR; + size_t typelen = 0; + size_t maxstrlen = 0; + char tchar; + nc_type nctype = NC_NAT; + int endianness = -1; + const char* p; + int n,count; + + if(nctypep) *nctypep = NC_NAT; + if(endianp) *endianp = NC_ENDIAN_NATIVE; + if(maxstrlenp) *maxstrlenp = 0; + + if(dtype == NULL) {stat = NC_ENCZARR; goto done;} + + /* Handle special cases */ + if(strcmp(dtype,NC_JSON_DTYPE_V2)==0) { + nctype = NC_JSON; + typelen = 1; + goto exit; + } else if(strcmp(dtype,">S1")==0) { + nctype = NC_CHAR; + typelen = 1; + goto exit; + } else if(memcmp(dtype,"|S",2)==0) { + nctype = NC_STRING; + sscanf(dtype,"|S%zu",&maxstrlen); + goto exit; + } + + /* Parse the dtype; should be a numeric type by now */ + p = dtype; + switch (*p++) { + case '<': endianness = NC_ENDIAN_LITTLE; break; + case '>': endianness = NC_ENDIAN_BIG; break; + case '|': endianness = NC_ENDIAN_NATIVE; break; + default: p--; endianness = NC_ENDIAN_NATIVE; break; + } + tchar = *p++; /* get the base type */ + /* Decode the type length */ + count = sscanf(p,"%zu%n",&typelen,&n); + if(count == 0) {stat = NC_ENCZARR; goto done;} + p += n; + + /* Numeric cases */ + switch(typelen) { + case 1: + switch (tchar) { + case 'i': nctype = NC_BYTE; break; + case 'u': nctype = NC_UBYTE; break; + default: {stat = NC_ENCZARR; goto done;} + } + break; + case 2: + switch (tchar) { + case 'i': nctype = NC_SHORT; break; + case 'u': nctype = NC_USHORT; break; + default: {stat = NC_ENCZARR; goto done;} + } + break; + case 4: + switch (tchar) { + case 'i': nctype = NC_INT; break; + case 'u': nctype = NC_UINT; break; + case 'f': nctype = NC_FLOAT; break; + default: {stat = NC_ENCZARR; goto done;} + } + break; + case 8: + switch (tchar) { + case 'i': nctype = NC_INT64; break; + case 'u': nctype = NC_UINT64; break; + case 'f': nctype = NC_DOUBLE; break; + default: {stat = NC_ENCZARR; goto done;} + } + break; + default: {stat = NC_ENCZARR; goto done;} + } + +exit: + if(nctypep) *nctypep = nctype; + if(endianp) *endianp = endianness; + if(maxstrlenp) *maxstrlenp = maxstrlen; + +done: + return stat; +} + +/* +Extract type and data for an attribute from json +*/ +static int +computeattrinfo(NC_FILE_INFO_T* file, nc_type typehint, const char* aname, const NCjson* jtypes, const NCjson* jdata, struct NCZ_AttrInfo* ainfo) +{ + int stat = NC_NOERR; + int purezarr = 0; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + const NCjson* jatype = NULL; + + ZTRACE(3,"typehint=%d aname=%s typehint=%d",typehint,aname); + + assert(aname != NULL); + + TESTPUREZARR; + + ainfo->name = aname; + + /* Infer the attribute data's type */ + if(purezarr || jtypes == NULL) { + ainfo->nctype = NC_NAT; + if((stat = NCZ_inferattrtype(ainfo->name,typehint,jdata,&ainfo->nctype))) goto done; + } else { + /* Search the jtypes for the type of this attribute */ + ainfo->nctype = NC_NAT; + NCJcheck(NCJdictget(jtypes,aname,(NCjson**)&jatype)); + if(jatype == NULL) {stat = NC_ENCZARR; goto done;} + if((stat=dtype2nctype(NCJstring(jatype),&ainfo->nctype,&ainfo->endianness,&ainfo->typelen))) goto done; + if(ainfo->nctype >= N_NCZARR_TYPES) {stat = NC_EINTERNAL; goto done;} + } + if((stat = NCZ_computeattrdata(file,jdata,ainfo))) goto done; + +done: + return ZUNTRACEX(THROW(stat),"typeid=%d typelen=%d datalen=%u",ainfo->nctype,ainfo->typelen,ainfo->datalen); +} diff --git a/libnczarr/zformat3.c b/libnczarr/zformat3.c new file mode 100644 index 0000000000..718d69cf81 --- /dev/null +++ b/libnczarr/zformat3.c @@ -0,0 +1,1761 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ +/** + * @file + * + * @author Dennis Heimbigner + */ + +#include "zincludes.h" +#include "zplugins.h" +#include "znc4.h" +#include "zfill.h" +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +#include "netcdf_filter_build.h" +#endif + +/**************************************************/ + +/*Mnemonics*/ +#define STRTEMPLATE "r%zu" + +/**************************************************/ +/* Big endian Bytes filter */ +static const char* NCZ_Bytes_Big_Text = "{\"name\": \"bytes\", \"configuration\": {\"endian\": \"big\"}}"; +NCjson* NCZ_Bytes_Big_Json = NULL; + +/* Little endian Bytes filter */ +static const char* NCZ_Bytes_Little_Text = "{\"name\": \"bytes\", \"configuration\": {\"endian\": \"little\"}}"; +NCjson* NCZ_Bytes_Little_Json = NULL; + +/**************************************************/ +/* Forward */ + +static int ZF3_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +static int ZF3_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map); +static int ZF3_close(NC_FILE_INFO_T* file); +static int ZF3_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +static int ZF3_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); +static int ZF3_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* jgroup, NCjson** jzgrpp, NCjson** jzsuperp); +static int ZF3_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zarrformat, int* nczarrformat); +static int ZF3_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs); +static int ZF3_decode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj, NClist* jfilters, size64_t** shapep, size64_t** chunksp, NClist* dimrefs); +static int ZF3_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts); +static int decode_var_dimrefs(NC_FILE_INFO_T* file, size_t rank, size64_t* shapes, const NCjson* jvar, NClist* dimrefs); +static int ZF3_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj); +static int ZF3_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj); +static int ZF3_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp); +static int ZF3_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jzgroupp); +static int ZF3_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp); +static int ZF3_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jzvarp); +static int ZF3_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp); +static int ZF3_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp); +static int ZF3_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp); +static int ZF3_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter); +static int ZF3_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames); +static int ZF3_encode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, size_t rank, const size64_t* chunkindices, char dimsep, char** keyp); +static int ZF3_decode_chunkkey(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const char* chunkname, size_t* rankp, size64_t** chunkindicesp); +static int ZF3_encode_xarray(NC_FILE_INFO_T* file, size_t rank, NC_DIM_INFO_T** dims, char** xarraydims, size_t* zarr_rankp); +static char ZF3_default_dimension_separator(NC_FILE_INFO_T* file); + +static int decode_dim_decls(NC_FILE_INFO_T* file, const NCjson* jdims, NClist* dimdefs); +static int dtype2nctype(const char* dtype, const char* dnczarr, nc_type* nctypep, size_t* typelenp); +static int nctype2dtype(nc_type nctype, size_t strlen, char** dtypep, char** dnczarrp); +static int computeattrinfo(NC_FILE_INFO_T* file, NC_OBJ* container, const char* aname, const NCjson* jtypes, const NCjson* jdata, struct NCZ_AttrInfo* ainfo); +static NCjson* build_named_config(const char* name, int pairs, ...); +static int extract_named_config(const NCjson* jpair, const char* field, const NCjson** jvaluep); + +/**************************************************/ +/* Format dispatch table */ + +static const NCZ_Formatter NCZ_formatter3_table = +{ + NCZARRFORMAT3, + ZARRFORMAT3, + NCZ_FORMATTER_VERSION, + + /*File-Level Operations*/ + ZF3_create, + ZF3_open, + ZF3_close, + + /*Read JSON Metadata*/ + ZF3_download_grp, + ZF3_download_var, + + ZF3_decode_group, + ZF3_decode_superblock, + ZF3_decode_nczarr_group, + ZF3_decode_var, + ZF3_decode_attributes, + + /*Write JSON Metadata*/ + ZF3_upload_grp, + ZF3_upload_var, + + ZF3_encode_superblock, + ZF3_encode_nczarr_group, + ZF3_encode_group, + + ZF3_encode_nczarr_array, + ZF3_encode_var, + + ZF3_encode_attributes, + + /*Filter Processing*/ + ZF3_encode_filter, + ZF3_decode_filter, + + /*Search*/ + ZF3_searchobjects, + + /*Chunkkeys*/ + ZF3_encode_chunkkey, + ZF3_decode_chunkkey, + + /*_ARRAY_DIMENSIONS*/ + ZF3_encode_xarray, + + /* Per-format default dimension separator */ + ZF3_default_dimension_separator, +}; + +const NCZ_Formatter* NCZ_formatter3 = &NCZ_formatter3_table; + +int +NCZF3_initialize(void) +{ + int stat = NC_NOERR; + NCjson* json = NULL; + NCJcheck(NCJparse(NCZ_Bytes_Little_Text,0,&json)); + NCZ_Bytes_Little_Json = json; + NCJcheck(NCJparse(NCZ_Bytes_Big_Text,0,&json)); + NCZ_Bytes_Big_Json = json; +done: + return THROW(stat); +} + +int +NCZF3_finalize(void) +{ + return NC_NOERR; +} + +/**************************************************/ + +/*File-Level Operations*/ + +/** + * @internal Synchronize file metadata from internal to map. + * + * @param file Pointer to file info struct. + * + * @return ::NC_NOERR No error. + * @author Dennis Heimbigner + */ +static int +ZF3_create(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + NC_UNUSED(uri); + NC_UNUSED(map); + ZTRACE(4,"file=%s",file->controller->path); + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + return ZUNTRACE(THROW(stat)); +} + +static int +ZF3_open(NC_FILE_INFO_T* file, NCURI* uri, NCZMAP* map) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = NULL; + + NC_UNUSED(uri); + NC_UNUSED(map); + ZTRACE(4,"file=%s",file->controller->path); + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + return ZUNTRACE(THROW(stat)); +} + +int +ZF3_close(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NC_UNUSED(file); + return THROW(stat); +} + +/**************************************************/ + +/*Dowload JSON Metadata*/ +int +ZF3_download_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Download zarr.json */ + if((stat = NCZ_grpkey(grp,&fullpath))) goto done; + if((stat = nczm_concat(fullpath,Z3GROUP,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_GROUP,key,&zobj->jobj))) goto done; + nullfree(key); key = NULL; + /* Verify that group zarr.json exists */ + if(zobj->jobj == NULL) {stat = NC_ENOTZARR; goto done;} + /* For V3, the attributes are part of the grp zarr.json */ + NCJcheck(NCJdictget(zobj->jobj,"attributes",&zobj->jatts)); + zobj->constjatts = 1; + +done: + nullfree(key); + nullfree(fullpath); + return THROW(stat); +} + +int +ZF3_download_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Download zarr.json */ + if((stat = NCZ_varkey(var,&fullpath))) goto done; + if((stat = nczm_concat(fullpath,Z3ARRAY,&key))) goto done; + if((stat = NCZMD_fetch_json_content(file,NCZMD_ARRAY,key,&zobj->jobj))) goto done; + nullfree(key); key = NULL; + /* Verify that var zarr.json exists */ + if(zobj->jobj == NULL) {stat = NC_ENOTZARR; goto done;} + /* For V3, the attributes are part of the var zarr.json */ + NCJcheck(NCJdictget(zobj->jobj,"attributes",&zobj->jatts)); + zobj->constjatts = 1; + +done: + nullfree(key); + nullfree(fullpath); + return THROW(stat); +} + +int +ZF3_decode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj, NCjson** jzgrpp, NCjson** jzsuperp) +{ + int stat = NC_NOERR; + NCjson* jzgrp = NULL; + NCjson* jzsuper = NULL; + const NCjson* jvalue = NULL; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + + NC_UNUSED(grp); + /* Verify the format */ + { + const NCjson* jvar = zobj->jobj; + int format; + NCJcheck(NCJdictget(jvar,"node_type",(NCjson**)&jvalue)); + assert(jvalue != NULL); + if(strcasecmp("group",NCJstring(jvalue))!=0) {stat = THROW(NC_ENOTZARR); goto done;} + NCJcheck(NCJdictget(jvar,"zarr_format",(NCjson**)&jvalue)); + sscanf(NCJstring(jvalue),ZARR_FORMAT_VERSION_TEMPLATE,&format); + if(format != zinfo->zarr.zarr_format) {stat = (THROW(NC_ENCZARR)); goto done;} + } + + if(zobj->jatts != NULL) { + /* Extract _nczarr_group from zobj->attr */ + NCJcheck(NCJdictget(zobj->jatts,NC_NCZARR_GROUP_ATTR,&jzgrp)); + /* Extract _nczarr_superblock from zobj->attr */ + NCJcheck(NCJdictget(zobj->jatts,NC_NCZARR_SUPERBLOCK_ATTR,&jzsuper)); + } + if(jzgrpp != NULL) *jzgrpp = jzgrp; + if(jzsuperp != NULL) *jzsuperp = jzsuper; + +done: + return THROW(stat); +} + +int +ZF3_decode_superblock(NC_FILE_INFO_T* file, const NCjson* jsuper, int* zformatp, int* nczformatp) +{ + int stat = NC_NOERR; + const NCjson* format = NULL; + int zformat = 0; + int nczformat = 0; + + NC_UNUSED(file); + assert(jsuper != NULL); + + if(zformatp) *zformatp = 0; + if(nczformatp) *nczformatp = 0; + + /* Extract the zarr format number and the nczarr format number */ + NCJcheck(NCJdictget(jsuper,"zarr_format",(NCjson**)&format)); + if(format != NULL) { + if(NCJsort(format) != NCJ_INT) {stat = NC_ENOTZARR; goto done;} + if(1!=sscanf(NCJstring(format),ZARR_FORMAT_VERSION_TEMPLATE,&zformat)) {stat = NC_ENOTZARR; goto done;} + } + NCJcheck(NCJdictget(jsuper,"nczarr_format",(NCjson**)&format)); + if(format != NULL) { + if(NCJsort(format) != NCJ_INT) {stat = NC_ENOTZARR; goto done;} + if(1!=sscanf(NCJstring(format),NCZARR_FORMAT_VERSION_TEMPLATE,&nczformat)) {stat = NC_ENOTZARR; goto done;} + } + + if(zformatp) *zformatp = zformat; + if(nczformatp) *nczformatp = nczformat; + +done: + return THROW(stat); +} + +int +ZF3_decode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const NCjson* jnczgrp, NClist* vars, NClist* subgrps, NClist* dimdefs) +{ + int stat = NC_NOERR; + size_t i; + const NCjson* jvalue = NULL; + + NC_UNUSED(grp); + + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); + + NCJcheck(NCJdictget(jnczgrp,"dimensions",(NCjson**)&jvalue)); + if(jvalue != NULL) { + if(NCJsort(jvalue) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} + /* Decode the dimensions defined in this group */ + if((stat = decode_dim_decls(file,jvalue,dimdefs))) goto done; + } + + NCJcheck(NCJdictget(jnczgrp,"arrays",(NCjson**)&jvalue)); + if(jvalue != NULL) { + /* Extract the variable names in this group */ + for(i=0;iformat_file_info; + /* per-variable info */ + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + const NCjson* jvar = NULL; + const NCjson* jvalue = NULL; + const NCjson* jendian = NULL; + const NCjson* jcodecs = NULL; + const NCjson* jchunkgrid = NULL; + const NCjson* jchunkkey = NULL; + const NCjson* jdtype = NULL; + const NCjson* jdnczarr = NULL; + const NCjson* jnczarray = NULL; + int varsized = 0; + int suppress = 0; /* Abort processing of this variable */ + nc_type vtype = NC_NAT; + size_t maxstrlen = 0; + size_t zarr_rank = 0; /* |shape| */ + int endianness = NC_ENDIAN_NATIVE; + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); + size64_t* shapes = NULL; + size64_t* chunks = NULL; + + jvar = zobj->jobj; + assert(jvar != NULL); + + /* Verify the format */ + { + int format; + NCJcheck(NCJdictget(jvar,"node_type",(NCjson**)&jvalue)); + assert(jvalue != NULL); + if(strcasecmp("array",NCJstring(jvalue))!=0) {stat = THROW(NC_ENOTZARR); goto done;} + NCJcheck(NCJdictget(jvar,"zarr_format",(NCjson**)&jvalue)); + sscanf(NCJstring(jvalue),ZARR_FORMAT_VERSION_TEMPLATE,&format); + if(format != zinfo->zarr.zarr_format) {stat = (THROW(NC_ENCZARR)); goto done;} + } + + /* Get _nczarr_array */ + if(zobj->jatts != NULL) { + NCJcheck(NCJdictget(zobj->jatts,NC_NCZARR_ARRAY_ATTR,(NCjson**)&jnczarray)); + } + + { + const char* dtype = NULL; + const char* dnczarr = NULL; + /* Get the standard dtype of the variable */ + NCJcheck(NCJdictget(jvar,"data_type",(NCjson**)&jdtype)); + if(jdtype == NULL || NCJsort(jdtype) != NCJ_STRING) {stat = NC_ENOTZARR; goto done;} + dtype = NCJstring(jdtype); + /* Get the dtype override from _nczarr_array */ + if(jnczarray != NULL) { + NCJcheck(NCJdictget(jnczarray,"nczarr_type",(NCjson**)&jdnczarr)); + if(jdnczarr != NULL && NCJsort(jdnczarr) != NCJ_STRING) {stat = NC_ENCZARR; goto done;} + if(jdnczarr != NULL) + dnczarr = NCJstring(jdnczarr); + } + /* Convert dtype to nc_type */ + if((stat = dtype2nctype(dtype,dnczarr,&vtype,&maxstrlen))) goto done; + if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { + /* Locate the NC_TYPE_INFO_T object */ + if((stat = ncz_gettype(file,var->container,vtype,&var->type_info))) goto done; + } else {stat = NC_EBADTYPE; goto done;} + if(vtype == NC_STRING) { + zsetmaxstrlen(maxstrlen,var); + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_NCZARR_MAXSTRLEN_ATTR,DA_MAXSTRLEN,FIXATT))) goto done; + } + } + + { /* Extract the shape */ + NCJcheck(NCJdictget(jvar,"shape",(NCjson**)&jvalue)); + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENOTZARR); goto done;} + zarr_rank = NCJarraylength(jvalue); + if(zarr_rank > 0) { + if((shapes = (size64_t*)calloc(zarr_rank,sizeof(size64_t)))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat=NCZ_decodesizet64vec(jvalue, &zarr_rank, shapes))) goto done; + } + /* Set the rank of the variable */ + if((stat = nc4_var_set_ndims(var, (int)zarr_rank))) goto done; + } + + /* Process dimrefs (might be NULL) */ + if((stat = decode_var_dimrefs(file,zarr_rank,shapes,jvar,dimrefs))) goto done; + + /* Process chunk_grid (must precede chunk cache creation) */ + { + NCJcheck(NCJdictget(jvar,"chunk_grid",(NCjson**)&jchunkgrid)); + if(jchunkgrid == NULL) {stat = NC_ENOTZARR; goto done;} + if((stat = extract_named_config(jchunkgrid,"name",&jvalue))) goto done; + if(strcmp(NCJstring(jvalue),"regular")!=0) {stat = NC_ENCZARR; goto done;} + if((stat = extract_named_config(jchunkgrid,"chunk_shape",&jvalue))) goto done; + /* Validate */ + if(jvalue == NULL || NCJsort(jvalue)!=NCJ_ARRAY || NCJarraylength(jvalue) != zarr_rank) + {stat = NC_ENOTZARR; goto done;} + if((chunks = (size64_t*)calloc(zarr_rank,sizeof(size64_t)))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat=NCZ_decodesizet64vec(jvalue, &zarr_rank, chunks))) goto done; + var->storage = NC_CHUNKED; + } + + /* Process chunk_key_encoding (must precede chunk cache creation) */ + { + NCglobalstate* ngs = NC_getglobalstate(); + assert(ngs != NULL); + zvar->dimension_separator = 0; + + NCJcheck(NCJdictget(jvar,"chunk_key_encoding",(NCjson**)&jchunkkey)); + if(jchunkkey == NULL) {stat = NC_ENOTZARR; goto done;} + /* Figure out what separator to use */ + if((stat = extract_named_config(jchunkkey,"name",&jvalue))) goto done; + if(strcmp(NCJstring(jvalue),"default")!=0 + && strcmp(NCJstring(jvalue),"v2")!=0) {stat = NC_ENOTZARR; goto done;} + /* Get the separator character */ + if((stat = extract_named_config(jchunkkey,"separator",&jvalue))) goto done; + if(jvalue != NULL) { + const char* sep = NCJstring(jvalue); + if(strlen(sep) != 1) {stat = NC_ENOTZARR; goto done;} + if(!islegaldimsep(sep[0])) {stat = NC_ENOTZARR; goto done;} + zvar->dimension_separator = sep[0]; + } else + zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global default */ + } + + /* fill_value; must precede calls to adjust cache */ + { + NCJcheck(NCJdictget(jvar,"fill_value",(NCjson**)&jvalue)); + if(jvalue == NULL || NCJsort(jvalue) == NCJ_NULL) { + var->no_fill = NC_NOFILL; + if((stat = NCZ_disable_fill(file,var))) goto done; + } else { /* Fill in var->fill_value */ + var->no_fill = NC_FILL; + NCZ_clearAttrInfo(file,&ainfo); + ainfo.name = NC_FillValue; + ainfo.nctype = vtype; + if((stat = NCZ_computeattrdata(file,jvalue,&ainfo))) goto done; + /* Create var->fill_value */ + assert(ainfo.nctype == vtype); + if((stat = NCZ_set_dual_obj_data(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,ainfo.datalen,ainfo.data))) goto done; + /* propagate to _FillValue attribute */ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXATT))) goto done; + /* clear+reclaim ainfo */ + NCZ_clearAttrInfo(file,&ainfo); + } + } + + /* Codecs key */ + /* From V3 Spec: A list of JSON objects providing codec configurations, + or null if no filters are to be applied. Each codec configuration + object MUST contain a "name" key identifying the codec to be used. + Note that for V3 every array has at least one codec: the "bytes" codec + that specifies endianness. + */ + if(var->filters == NULL) var->filters = (void*)nclistnew(); +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + if((stat = NCZ_filter_initialize())) goto done; +#endif + NCJcheck(NCJdictget(jvar,"codecs",(NCjson**)&jcodecs)); + if(jcodecs == NULL || NCJarraylength(jcodecs) == 0) {stat = NC_ENOTZARR; goto done;} + { /* Get endianess from the first codec */ + jendian = NCJith(jcodecs,0); + NCJcheck(NCJdictget(jendian,"name",(NCjson**)&jvalue)); + if(strcmp("bytes",NCJstring(jvalue))!=0) {stat = NC_ENOTZARR; goto done;} + /* Get the configuration */ + NCJcheck(NCJdictget(jendian,"configuration",(NCjson**)&jvalue)); + if(NCJsort(jvalue) != NCJ_DICT) {stat = NC_ENOTZARR; goto done;} + /* Get the endianness */ + NCJcheck(NCJdictget(jvalue,"endian",(NCjson**)&jvalue)); + if(jvalue == NULL) {stat = NC_ENOTZARR; goto done;} + if(strcmp("big",NCJstring(jvalue))==0) endianness = NC_ENDIAN_BIG; + else if(strcmp("little",NCJstring(jvalue))==0) endianness = NC_ENDIAN_LITTLE; + else {stat = NC_ENOTZARR; goto done;} + if(endianness != NC_ENDIAN_NATIVE) { + var->endianness = endianness; + var->type_info->endianness = var->endianness; /* Propagate */ + } + /* bytes filter is never actually invoked; it just records variable's endianness */ +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + { + size_t k; + const NCjson* jfilter = NULL; + for(k=1;k 0) + suppress = 1; + + if(suppress) { + NC_GRP_INFO_T* grp = var->container; + /* Reclaim NCZarr variable specific info */ + (void)NCZ_zclose_var1(var); + /* Remove from list of variables and reclaim the top level var object */ + (void)nc4_var_list_del(grp, var); + var = NULL; + } + + if(shapesp) {*shapesp = shapes; shapes = NULL;} + if(chunksp) {*chunksp = chunks; chunks = NULL;} + +done: + nullfree(chunks); + NCZ_clearAttrInfo(file,&ainfo); + nullfree(shapes); shapes = NULL; + return THROW(stat); +} + +int +ZF3_decode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts) +{ + int stat = NC_NOERR; + size_t i; + NC_ATT_INFO_T* att = NULL; + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); + const NCjson* jtypes = NULL; + const NCjson* jnczattr = NULL; + + /* Extract _nczarr_attrs */ + NCJcheck(NCJdictget(jatts,NC_NCZARR_ATTRS_ATTR,(NCjson**)&jnczattr)); + if(jnczattr != NULL) { + /* See if we have jtypes */ + NCJcheck(NCJdictget(jnczattr,"attribute_types",(NCjson**)&jtypes)); + } + + if(jatts != NULL && NCJsort(jatts)==NCJ_DICT) { + for(i=0;isort == NCGRP ? container : NULL); + + /* See if this is reserved attribute */ + ra = NC_findreserved(aname); + if(ra != NULL) { + /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ + if(grp != NULL && file->root_grp == grp && strcmp(aname,NCPROPS)==0 && grp != NULL) { + /* Setup provenance */ + if(NCJsort(javalue) != NCJ_STRING) + {stat = (THROW(NC_ENCZARR)); goto done;} /*malformed*/ + if((stat = NCZ_read_provenance(file,aname,NCJstring(javalue)))) goto done; + continue; /* Suppress _NCProperties until we write the file */ + } else {/* case other: if attribute is hidden */ + if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ + } + } + + /* Collect the attribute's type and value */ + NCZ_clearAttrInfo(file,&ainfo); + if((stat = computeattrinfo(file,container,aname,jtypes,javalue,&ainfo))) goto done; + /* Create the attribute */ + if((stat = ncz_makeattr(file,container,&ainfo,&att))) goto done; + + /* if a dual attr sync with NC_XXX_INFO_T object */ + dualatt = NCZ_is_dual_att(aname); /* See if this is a dual attribute */ + if(dualatt != DA_NOT) { + if((stat = NCZ_sync_dual_att(file,container,aname,dualatt,FIXOBJ))) goto done; + } + } + } + +done: + NCZ_clearAttrInfo(file,&ainfo); + return THROW(stat); +} + +/**************************************************/ + +int +ZF3_upload_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Add attributes to the group zarr.json */ + NCJcheck(NCJinsert(zobj->jobj,"attributes",zobj->jatts)); zobj->jatts = NULL; + + /* Construct grp key */ + if((stat = NCZ_grpkey(grp,&fullpath))) goto done; + + /* build ZGROUP path */ + if((stat = nczm_concat(fullpath,Z3GROUP,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_GROUP,key,zobj->jobj))) goto done; + nullfree(key); key = NULL; + +done: + nullfree(fullpath); + nullfree(key); + return THROW(stat); +} + +int +ZF3_upload_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, struct ZOBJ* zobj) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* key = NULL; + + /* Add attributes to the var zarr.json */ + NCJcheck(NCJinsert(zobj->jobj,"attributes",zobj->jatts)); zobj->jatts = NULL; + + /* Construct var key */ + if((stat = NCZ_varkey(var,&fullpath))) goto done; + + /* build ZARRAY path */ + if((stat = nczm_concat(fullpath,Z3ARRAY,&key))) goto done; + /* Write to map */ + if((stat=NCZMD_update_json_content(file,NCZMD_ARRAY,key,zobj->jobj))) goto done; + nullfree(key); key = NULL; + +done: + nullfree(fullpath); + nullfree(key); + return THROW(stat); + return THROW(stat); +} + +/*Write JSON Metadata*/ +int +ZF3_encode_superblock(NC_FILE_INFO_T* file, NCjson** jsuperp) +{ + int stat = NC_NOERR; + char version[64]; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jsuper = NULL; + + /* create superblock */ + snprintf(version,sizeof(version),NCZARR_FORMAT_VERSION_TEMPLATE, zinfo->zarr.nczarr_format); + NCJnew(NCJ_DICT,&jsuper); + NCJcheck(NCJinsertstring(jsuper,"version",version)); + if(jsuperp) {*jsuperp = jsuper; jsuper = NULL;} +done: + NCJreclaim(jsuper); + return THROW(stat); +} + +int +ZF3_encode_nczarr_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jnczgrpp) +{ + int stat = NC_NOERR; + size_t i; + NCjson* jnczgrp = NULL; + NCjson* jdims = NULL; + NCjson* jvars = NULL; + NCjson* jsubgrps = NULL; + NCjson* jsize = NULL; + NCjson* junlim = NULL; + NCjson* jdim = NULL; + + NC_UNUSED(file); + /* Create the NCZ_GROUP dict */ + NCJnew(NCJ_DICT,&jnczgrp); + + /* Collect and encode the grp dimension declarations */ + NCJnew(NCJ_DICT,&jdims); + for(i=0;idim);i++) { + NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)ncindexith(grp->dim,i); + char digits[64]; + snprintf(digits,sizeof(digits),"%zu",dim->len); + NCJcheck(NCJnewstring(NCJ_INT,digits,&jsize)); + NCJcheck(NCJnewstring(NCJ_INT,(dim->unlimited?"1":"0"),&junlim)); + NCJnew(NCJ_DICT,&jdim); + NCJcheck(NCJinsert(jdim,"size",jsize)); jsize = NULL; + NCJcheck(NCJinsert(jdim,"unlimited",junlim)); junlim = NULL; + NCJcheck(NCJinsert(jdims,dim->hdr.name,jdim)); jdim = NULL; + } + NCJcheck(NCJinsert(jnczgrp,"dimensions",jdims)); jdims = NULL; + + /* Collect and insert the variable names in this group */ + NCJnew(NCJ_ARRAY,&jvars); + for(i=0;ivars);i++) { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); + NCJcheck(NCJaddstring(jvars,NCJ_STRING,var->hdr.name)); + } + NCJcheck(NCJinsert(jnczgrp,"arrays",jvars)); jvars = NULL; + + /* Collect and insert the variable names in this group */ + NCJnew(NCJ_ARRAY,&jsubgrps); + for(i=0;ichildren);i++) { + NC_GRP_INFO_T* child = (NC_GRP_INFO_T*)ncindexith(grp->children,i); + NCJcheck(NCJaddstring(jsubgrps,NCJ_STRING,child->hdr.name)); + } + NCJcheck(NCJinsert(jnczgrp,"groups",jsubgrps)); jsubgrps = NULL; + + if(jnczgrpp) {*jnczgrpp = jnczgrp; jnczgrp = NULL;} +done: + NCJreclaim(jnczgrp); + NCJreclaim(jdims); + NCJreclaim(jvars); + NCJreclaim(jsubgrps); + NCJreclaim(jsize); + NCJreclaim(junlim); + NCJreclaim(jdim); + return THROW(stat); +} + +int +ZF3_encode_group(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jgroupp) +{ + int stat = NC_NOERR; + NCjson* jgroup = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + NC_UNUSED(grp); + + /* Add standard group fields */ + NCJcheck(NCJnew(NCJ_DICT,&jgroup)); /* zarr.json */ + NCJcheck(NCJinsertstring(jgroup,"node_type","group")); + NCJcheck(NCJinsertint(jgroup,"zarr_format",zfile->zarr.zarr_format)); + + if(jgroupp) {*jgroupp = jgroup; jgroup = NULL;} +done: + NCJreclaim(jgroup); + return THROW(stat); +} + +int +ZF3_encode_nczarr_array(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson** jnczarrayp) +{ + int stat = NC_NOERR; + NClist* dimrefs = NULL; + NCjson* jnczarray = NULL; + NCjson* jdimrefs = NULL; + NCbytes* dimfqn = ncbytesnew(); + size_t i; + char* dnczarr = NULL; + + NC_UNUSED(file); + + NCJnew(NCJ_DICT,&jnczarray); + + if((dimrefs = nclistnew())==NULL) {stat = NC_ENOMEM; goto done;} + if(var->ndims > 0) { + for(i=0;indims;i++) { + NC_DIM_INFO_T* dim = var->dim[i]; + if((stat = NCZ_makeFQN((NC_OBJ*)dim,dimfqn))) goto done; + nclistpush(dimrefs,ncbytesextract(dimfqn)); + } + } + + /* Create the dimrefs json object */ + NCJnew(NCJ_ARRAY,&jdimrefs); + while(nclistlength(dimrefs)>0) { + char* fqn = (char*)nclistremove(dimrefs,0); + NCJaddstring(jdimrefs,NCJ_STRING,fqn); + nullfree(fqn); fqn = NULL; + } + /* Insert dimension_references */ + NCJcheck(NCJinsert(jnczarray,"dimension_references",jdimrefs)); jdimrefs = NULL; + + /* Add the _Storage flag */ + /* Record if this is a scalar */ + if(var->ndims == 0) { + NCJcheck(NCJinsertint(jnczarray,"scalar",1)); + } + + /* everything looks like it is chunked */ + NCJcheck(NCJinsertstring(jnczarray,"storage","chunked")); + + /* insert nczarr_type */ + if((stat = nctype2dtype(var->type_info->hdr.id,NCZ_get_maxstrlen((NC_OBJ*)var),NULL,&dnczarr))) goto done; + if(dnczarr != NULL) + NCJcheck(NCJinsertstring(jnczarray,"nczarr_type",dnczarr)); + + if(jnczarrayp) {*jnczarrayp = jnczarray; jnczarray = NULL;} +done: + nullfree(dnczarr); + nclistfreeall(dimrefs); + ncbytesfree(dimfqn); + NCJreclaim(jnczarray); + NCJreclaim(jdimrefs); + return THROW(stat); +} + +int +ZF3_encode_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj, NCjson** jvarp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jvar = NULL; + NCjson* jshape = NULL; + NCjson* jchunks = NULL; + NCjson* jfill = NULL; + NCjson* jcodecs = NULL; + NCjson* jcodec = NULL; + NCjson* jdimnames = NULL; + NCjson* jchunkgrid = NULL; + NCjson* jchunkkey = NULL; + NCjson* jcfg = NULL; + NCjson* jtmp = NULL; + size_t zarr_rank = 0; + size_t i; + char tmpstr[1024]; + char* dtypename = NULL; + + NC_UNUSED(file); +#ifndef NETCDF_ENABLE_NCZARR_FILTERS + NC_UNUSED(filtersj); +#endif + + NCJnew(NCJ_DICT,&jvar); + + /* Add standard group fields */ + NCJcheck(NCJinsertstring(jvar,"node_type","array")); + NCJcheck(NCJinsertint(jvar,"zarr_format",zfile->zarr.zarr_format)); + + /* data_type key */ + /* A string or list defining a valid data type for the array. */ + { /* Add the type name */ + int atomictype = var->type_info->hdr.id; + assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); + if((stat = nctype2dtype(atomictype,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename,NULL))) goto done; + NCJcheck(NCJinsertstring(jvar,"data_type",dtypename)); + } + + /* fill_value key */ + if(var->no_fill) { + NCJnew(NCJ_NULL,&jfill); + } else {/*!var->no_fill*/ + int atomictype = var->type_info->hdr.id; + if(var->fill_value == NULL) { + if((stat = NCZ_ensure_fill_value(var))) goto done; + } + /* Convert var->fill_value to a string */ + if((stat = NCZ_stringconvert(atomictype,1,var->fill_value,&jfill))) goto done; + assert(jfill->sort != NCJ_ARRAY); + } + NCJcheck(NCJinsert(jvar,"fill_value",jfill)); jfill = NULL; + + /* shape key */ + /* Integer list defining the length of each dimension of the array.*/ + /* Create the list */ + zarr_rank = var->ndims; + NCJnew(NCJ_ARRAY,&jshape); + if(var->ndims > 0) { + for(i=0;idim[i]; + snprintf(tmpstr,sizeof(tmpstr),"%zu",dim->len); + NCJaddstring(jshape,NCJ_INT,tmpstr); + } + } + NCJcheck(NCJinsert(jvar,"shape",jshape)); jshape = NULL; + + /* chunks key */ + /* The zarr format does not support the concept + of contiguous (or compact), so it will never appear in the read case. + */ + /* Create the list of chunksizes */ + NCJnew(NCJ_ARRAY,&jchunks); + if(var->ndims > 0) { + for(i=0;ichunksizes[i]; + snprintf(tmpstr,sizeof(tmpstr),"%lld",len); + NCJaddstring(jchunks,NCJ_INT,tmpstr); + } + } + + /* Build the chunk_grid configuration */ + jchunkgrid = build_named_config("regular",1,"chunk_shape",jchunks); jchunks = NULL; + NCJcheck(NCJinsert(jvar,"chunk_grid",jchunkgrid)); jchunkgrid = NULL; + + /* Build the chunk_key_encoding configuration */ + { + char sepstr[2]; + sepstr[0] = NCZ_get_dimsep(var); + sepstr[1] = '\0'; + NCJcheck(NCJnewstring(NCJ_STRING,sepstr,(NCjson**)&jtmp)); + jchunkkey = build_named_config("default",1,"separator",jtmp); jtmp = NULL; + NCJcheck(NCJinsert(jvar,"chunk_key_encoding",jchunkkey)); jchunkkey = NULL; + } + + /* Collect dimension_names, even if they are meaningless */ + NCJnew(NCJ_ARRAY,&jdimnames); + for(i=0;indims;i++) { + NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)var->dim[i]; + NCJcheck(NCJnewstring(NCJ_STRING,dim->hdr.name,&jtmp)); + NCJcheck(NCJappend(jdimnames,jtmp)); jtmp = NULL; + } + NCJcheck(NCJinsert(jvar,"dimension_names",jdimnames)); jdimnames = NULL; + + /* There is always at least the bytes codec */ + NCJnew(NCJ_ARRAY,&jcodecs); + { + switch (var->endianness) { + case NC_ENDIAN_BIG: NCJcheck(NCJnewstring(NCJ_STRING,"big",&jtmp)); break; + case NC_ENDIAN_LITTLE: NCJcheck(NCJnewstring(NCJ_STRING,"little",&jtmp)); break; + default: stat = NC_EINVAL; goto done; + } + jcodec = build_named_config("bytes",1,"endian",jtmp); jtmp = NULL; + NCJcheck(NCJappend(jcodecs,jcodec)); jcodec = NULL; + } + +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + while(nclistlength(filtersj) > 0) { + NCjson* jcodec = nclistremove(filtersj,0); + NCJcheck(NCJappend(jcodecs,jcodec)); jcodec = NULL; + } +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + NCJcheck(NCJinsert(jvar,"codecs",jcodecs)); jcodecs = NULL; + + if(jvarp) {*jvarp = jvar; jvar = NULL;} + +done: + nullfree(dtypename); + NCJreclaim(jvar); + NCJreclaim(jshape); + NCJreclaim(jchunks); + NCJreclaim(jfill); + NCJreclaim(jcodecs); + NCJreclaim(jcodec); + NCJreclaim(jdimnames); + NCJreclaim(jchunkgrid); + NCJreclaim(jchunkkey); + NCJreclaim(jcfg); + NCJreclaim(jtmp); + return THROW(stat); +} + +int +ZF3_encode_attributes(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson** jnczconp, NCjson** jsuperp, NCjson** jattsp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i; + NCindex* atts = NULL; + NCjson* jatts = NULL; + NCjson* jtypes = NULL; + NCjson* jdata = NULL; + NCjson* jnczatt = NULL; + NC_VAR_INFO_T* var = NULL; + NC_GRP_INFO_T* grp = NULL; + char* d2name = NULL; + char* dnczarr = NULL; + const char* nczname = NULL; + int purezarr = 0; + size_t typesize = 0; + + TESTPUREZARR; + + if(container->sort == NCVAR) { + var = (NC_VAR_INFO_T*)container; + atts = var->att; + nczname = NC_NCZARR_ARRAY_ATTR; + } else if(container->sort == NCGRP) { + grp = (NC_GRP_INFO_T*)container; + atts = grp->att; + nczname = NC_NCZARR_GROUP_ATTR; + } + + NCJnew(NCJ_DICT,&jatts); + if(ncindexsize(atts) > 0) { + NCJnew(NCJ_DICT,&jtypes); + + /* Walk all the attributes convert to json and collect the dtype */ + for(i=0;inc_typeid > NC_MAX_ATOMIC_TYPE) {stat = (THROW(NC_ENCZARR)); goto done;} + if(a->nc_typeid == NC_STRING) + typesize = (size_t)NCZ_get_maxstrlen(container); + else { + if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done; + } + + /* Convert to storable json */ + + if(a->nc_typeid == NC_CHAR && NCZ_iscomplexjsonstring(a->hdr.name,a->len,(char*)a->data,&jdata)) { + dnczarr = strdup(NC_JSON_DTYPE_V3); + d2name = strdup(dnczarr); + } else { + if((stat = NCZ_stringconvert(a->nc_typeid,a->len,a->data,&jdata))) goto done; + /* Collect the corresponding dtype and the alias type to use for attributes */ + if((stat = nctype2dtype(a->nc_typeid,typesize,&d2name,&dnczarr))) goto done; + } + /* Overwrite the zarr type with the nczarr type */ + if(dnczarr != NULL) {nullfree(d2name); d2name = dnczarr; dnczarr = NULL;} + /* Insert the attribute; optionally consumes jdata */ + if((stat = ncz_insert_attr(jatts,jtypes,a->hdr.name,&jdata,d2name))) goto done; + + /* cleanup */ + NCJreclaim(jdata); jdata = NULL; + nullfree(d2name); d2name = NULL; + nullfree(dnczarr); dnczarr = NULL; + } + } + + /* Finalize the contents of jtypes and jatts */ + if(!purezarr) { + if(jtypes == NULL) NCJnew(NCJ_DICT,&jtypes); + /* Insert _nczarr_group|_nczarr_var + type */ + if(jnczconp != NULL && *jnczconp != NULL) { + if((stat = ncz_insert_attr(jatts,jtypes,nczname,jnczconp,NC_JSON_DTYPE_V3))) goto done; + *jnczconp = NULL; + } + /* Insert _nczarr_super (if root group) + type */ + if(jsuperp != NULL && *jsuperp != NULL) { + if((stat=ncz_insert_attr(jatts,jtypes,NC_NCZARR_SUPERBLOCK_ATTR,jsuperp,NC_JSON_DTYPE_V3))) goto done; + *jsuperp = NULL; + } + + /* Build _nczarr_attrs */ + NCJnew(NCJ_DICT,&jnczatt); + NCJcheck(NCJinsert(jnczatt,"attribute_types",jtypes)); + /* WARNING, jtypes may undergo further changes */ + /* Insert _nczarr_attrs + type */ + if((stat=ncz_insert_attr(jatts,jtypes,NC_NCZARR_ATTRS_ATTR,&jnczatt,NC_JSON_DTYPE_V3))) goto done; + jtypes = NULL; + assert(*jnczconp == NULL && jnczatt == NULL && jtypes == NULL); + } + + if(jattsp) {*jattsp = jatts; jatts = NULL;} + +done: + nullfree(d2name); + nullfree(dnczarr); + NCJreclaim(jdata); + NCJreclaim(jatts); + NCJreclaim(jtypes); + return THROW(stat); +} + +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +/*Filter Processing*/ +static int +ZF3_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp) +{ + int stat = NC_NOERR; + NCjson* jfilter = NULL; + char* codec = NULL; + + NC_UNUSED(file); + + /* assumptions */ + assert(filter->flags & FLAG_WORKING); + + /* Convert the HDF5 id + parameters to the codec form */ + + /* We need to ensure the the current visible parameters are defined and had the opportunity to come + from the working parameters */ + assert((filter->flags & (FLAG_VISIBLE | FLAG_WORKING)) == (FLAG_VISIBLE | FLAG_WORKING)); + + /* Convert the visible parameters back to codec */ + if(filter->plugin->codec.codec->NCZ_hdf5_to_codec) { + if((stat = filter->plugin->codec.codec->NCZ_hdf5_to_codec(NCplistzarrv3,filter->hdf5.id,filter->hdf5.visible.nparams,filter->hdf5.visible.params,&codec))) goto done; + } else + {stat = NC_EFILTER; goto done;} + + /* Parse the codec as the return */ + NCJcheck(NCJparse(codec,0,&jfilter)); + if(jfilterp) {*jfilterp = jfilter; jfilter = NULL;} + +done: + nullfree(codec); + NCJreclaim(jfilter); + return THROW(stat); +} + +static int +ZF3_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter) +{ + int stat = NC_NOERR; + const NCjson* jvalue = NULL; + NCZ_Plugin* plugin = NULL; + NCZ_Codec codec = NCZ_codec_empty(); + NCZ_HDF5 hdf5 = NCZ_hdf5_empty(); + + NC_UNUSED(file); + + if(var->filters == NULL) var->filters = nclistnew(); + + /* Get the name of this codec filter */ + NCJcheck(NCJdictget(jfilter,"name",(NCjson**)&jvalue)); + if(NCJsort(jvalue) != NCJ_STRING) {stat = THROW(NC_ENOFILTER); goto done;} + + /* Save the codec */ + if((codec.id = strdup(NCJstring(jvalue)))==NULL) {stat = NC_ENOMEM; goto done;} + NCJcheck(NCJunparse(jfilter,0,&codec.codec)); + + /* Find the plugin for this filter */ + if((stat = NCZ_plugin_lookup(codec.id,&plugin))) goto done; + + if(plugin != NULL) { + /* Save the hdf5 id */ + hdf5.id = plugin->codec.codec->hdf5id; + /* Convert the codec to hdf5 form visible parameters */ + if(plugin->codec.codec->NCZ_codec_to_hdf5) { + if((stat = plugin->codec.codec->NCZ_codec_to_hdf5(NCplistzarrv3,codec.codec,&hdf5.id,&hdf5.visible.nparams,&hdf5.visible.params))) + goto done; + } + filter->flags |= FLAG_VISIBLE; + filter->hdf5 = hdf5; hdf5 = NCZ_hdf5_empty(); + filter->codec = codec; codec = NCZ_codec_empty(); + filter->plugin = plugin; plugin = NULL; + } else { + /* Create a fake filter so we do not forget about this codec */ + filter->hdf5 = NCZ_hdf5_empty(); + filter->codec = codec; codec = NCZ_codec_empty(); + } + +done: + ncz_hdf5_clear(&hdf5); + ncz_codec_clear(&codec); + return THROW(stat); +} +#else /*!NETCDF_ENABLE_NCZARR_FILTERS*/ +static int +ZF3_encode_filter(NC_FILE_INFO_T* file, NCZ_Filter* filter, NCjson** jfilterp) +{ + NC_UNUSED(file); + NC_UNUSED(filter); + if(jfilterp) *jfilterp = NULL; + return NC_NOERR; +} + +static int +ZF3_decode_filter(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NCjson* jfilter, NCZ_Filter* filter) +{ + NC_UNUSED(file); + NC_UNUSED(var); + NC_UNUSED(jfilter); + NC_UNUSED(filter); + return NC_NOERR; +} +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ + + +/*Search*/ +int +ZF3_searchobjects(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrpnames) +{ + int stat = NC_NOERR; + size_t i; + char* grpkey = NULL; + NClist* matches = nclistnew(); + char* subkey = NULL; + char* objkey = NULL; + NCjson* jcontents = NULL; + const NCjson* jnodetype = NULL; + + /* Compute the key for the grp */ + if((stat = NCZ_grpkey(grp,&grpkey))) goto done; + if((stat = NCZMD_list(file,grpkey,matches))) goto done; /* Shallow listing */ + /* Search grp for zarr.json objects and for chunk objects */ + /* In order to tell if the name refers to an array, there are two ways to do it. + 1. we extend the objkey with "/c/ or "/c." to see if it exists as a prefix. + 2. we read the zarr.json and look at the node_type field. + In the absence of consolidated metadat, (1) is slightly faster, but requires + extending the zmap interface. + So, for now, we implement case (2). + */ + for(i=0;iformat_var_info; + NCbytes* key = ncbytesnew(); + size_t r; + + NC_UNUSED(file); + + if(keyp) *keyp = NULL; + assert(islegaldimsep(dimsep)); + + if(zvar->nonstdchunkkey) { + if(rank == 0) {/*scalar*/ + ncbytescat(key,"0"); + } else for(r=0;r 0) ncbytesappend(key,dimsep); + /* Print as decimal with no leading zeros */ + snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]); + ncbytescat(key,sindex); + } + } else { /* !zvar->nonstdchunkkey */ + ncbytescat(key,"c"); + for(r=0;rformat_var_info; + char* base = NULL; + char sep; + + NC_UNUSED(file); + + assert(strlen(chunkname) > 0); + + /* Figure out the separator char by looking for the first non digit in the chunkkey */ + if(zvar->nonstdchunkkey) { + oldp = chunkkey; + sep = '\0'; + for(;*oldp;oldp++) {char c = *oldp; if(c < '0' || c > '9') {sep = c; break;}} + } else { + sep = chunkkey[1]; /* might be trailing nul */ + } + assert(sep=='\0' || islegaldimsep(sep)); + + if(zvar->nonstdchunkkey) { + /* Pass 1 to get rank and separate the indices*/ + oldp = (base = chunkkey); + for(rank=0;;) { + newp = strchr(oldp,sep); /* look for next sep or eos */ + rank++; + if(newp == NULL) break; + *newp = '\0'; + oldp = newp+1; + } + } else {/*!zvar->nonstdchunkkey*/ + if(chunkname[0] != 'c') {stat = NC_ENOTZARR; goto done;} + base = &chunkkey[1]; + /* Handle scalar case */ + if(strlen(base)==0) { + rank = 0; + } else { + if(*base != sep) {stat = NC_ENOTZARR; goto done;} + base++; /* skip leading sep */ + /* Pass 1 to get rank and separate the indices*/ + oldp = base; + for(rank=0;;) { + newp = strchr(oldp,sep); /* look for next sep or eos */ + rank++; + if(newp == NULL) break; + *newp = '\0'; + oldp = newp+1; + } + assert(rank > 0); + /* Create index vector */ + if((chunkindices = (size64_t*)malloc(rank*sizeof(size64_t)))==NULL) {stat = NC_ENOMEM; goto done;} + /* Pass 2 to get indices */ + oldp = base; + for(r=0;rhdr.name; + if(i > 0) ncbytescat(buf,","); + ncbytescat(buf,dimname); + } + ncbytescat(buf,"]"); + if(xarraydimsp) {*xarraydimsp = ncbytesextract(buf);} + if(zarr_rankp) {*zarr_rankp = zarr_rank;} + + ncbytesfree(buf); + return THROW(stat); +} + +static char +ZF3_default_dimension_separator(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return DFALT_DIM_SEPARATOR_V3; +} + +/**************************************************/ +/* Support Functions */ + +static int +decode_dim_decls(NC_FILE_INFO_T* file, const NCjson* jdims, NClist* dimdefs) +{ + int stat = NC_NOERR; + size_t i; + struct NCZ_DimInfo* dimdef = NULL; + + NC_UNUSED(file); + + assert(NCJsort(jdims) == NCJ_DICT); + for(i=0;inorm_name,NCJstring(jname),sizeof(dimdef->norm_name)); + + if(NCJisatomic(jdim)) { /* old-style length only dimension spec */ + NCJcheck(NCJcvt(jdim,NCJ_INT,&cvt)); + dimdef->shape = (size64_t)cvt.ival; + dimdef->unlimited = 0; + } else { + const NCjson* jsize = NULL; + const NCjson* junlim = NULL; + assert(NCJsort(jdim) == NCJ_DICT); + NCJcheck(NCJdictget(jdim,"size",(NCjson**)&jsize)); + NCJcheck(NCJdictget(jdim,"unlimited",(NCjson**)&junlim)); + NCJcheck(NCJcvt(jsize,NCJ_INT,&cvt)); + dimdef->shape = (size64_t)cvt.ival; + memset(&cvt,0,sizeof(cvt)); + NCJcheck(NCJcvt(junlim,NCJ_INT,&cvt)); + dimdef->unlimited = (cvt.ival == 0 ? 0 : 1); + } + nclistpush(dimdefs,dimdef); dimdef = NULL; + } + +done: + NCZ_reclaim_diminfo(dimdef); + return THROW(stat); +} + +/* + * Collect/create the dimension names for this variable. + * In order of preference: + * 1. _nczarr_var.dimensions -- the name are FQNs. Only available if !purezarr. + * 2. dimension_names -- relative names scoped to parent group + * 3. _ARRAY_DIMENSIONS -- (xarray) treat as a replacement for dimension_names if the latter is not defined + * 4. _Anonymous_Dim_n -- scoped to root group and n is the length of the dimensions. + */ + +/** +@param [in] file +@param [in] var +@param [in] jvar the JSON metadata for var +@param [out] dimrefs the list of FQNs for the variable's dimrefs +*/ +static int +decode_var_dimrefs(NC_FILE_INFO_T* file, size_t rank, size64_t* shapes, const NCjson* jvar, NClist* dimrefs) +{ + int stat = NC_NOERR; + size_t j; + size_t i; + int purezarr = 0; + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + const NCjson* jatts = NULL; + const NCjson* jdimnames = NULL; + const NCjson* jxarray = NULL; + const NCjson* jnczarray = NULL; + const NCjson* jdimfqns = NULL; + + TESTPUREZARR; + + assert(dimrefs != NULL); + + /* Get the "dimension_names" from jvar */ + NCJcheck(NCJdictget(jvar,"dimension_names",(NCjson**)&jdimnames)); + + /* Get the set of attributes */ + NCJcheck(NCJdictget(jvar,"attributes",(NCjson**)&jatts)); + + if(jatts != NULL) { + /* Get the xarray _ARRAY_DIMENSIONS */ + if(!(zinfo->flags & FLAG_XARRAYDIMS)) { + NCJcheck(NCJdictget(jatts,NC_XARRAY_DIMS,(NCjson**)&jxarray)); + } + /* Get _nczarr_array then "dimension_references" */ + NCJcheck(NCJdictget(jatts,NC_NCZARR_ARRAY_ATTR,(NCjson**)&jnczarray)); + if(jnczarray != NULL) { + NCJcheck(NCJdictget(jatts,"dimension_references",(NCjson**)&jdimfqns)); + } + } + + if(!purezarr && jdimfqns != NULL) { /* Use the NCZarr dimension fqns */ + for(i=0;i N_NCZARR_TYPES) return NC_EINVAL; + + if(dtypep) *dtypep = NULL; + if(dnczarrp) *dnczarrp = NULL; + + switch (nctype) { + case NC_BYTE: {dtype = strdup("int8"); break;} + case NC_CHAR: {dtype = strdup("uint8"); dnczarr = strdup("char"); break;} + case NC_SHORT: {dtype = strdup("int16"); break;} + case NC_INT: {dtype = strdup("int32"); break;} + case NC_FLOAT: {dtype = strdup("float32"); break;} + case NC_DOUBLE: {dtype = strdup("float64"); break;} + case NC_UBYTE: {dtype = strdup("uint8"); break;} + case NC_USHORT: {dtype = strdup("uint16"); break;} + case NC_UINT: {dtype = strdup("uint32"); break;} + case NC_INT64: {dtype = strdup("int64"); break;} + case NC_UINT64: {dtype = strdup("uint64"); break;} + case NC_STRING: { + char rstring[64]; + size_t bits = 8*strlen; + snprintf(rstring,sizeof(rstring),STRTEMPLATE,bits); + dtype = strdup(rstring); + dnczarr = strdup("string"); + break; + } + case NC_NAT: case NC_JSON: default: stat = NC_ENOTZARR; goto done; + } + + if(dtypep) {*dtypep = dtype; dtype = NULL;} + if(dnczarrp) {*dnczarrp = dnczarr; dnczarr = NULL;} + +done: + nullfree(dtype); + nullfree(dnczarr); + return THROW(stat); +} + +/* +@internal Convert a numcodecs Zarr v3 dtype spec to a corresponding nc_type. +@param dtype - [in] dtype to convert; NULL => the type came from an attribute type +@param dnczarr - [in] dtype override +@param nctypep - [out] hold corresponding type +@param typelenp - [out] hold corresponding type size (esp. for fixed length strings) +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + +static int +dtype2nctype(const char* dtype, const char* dnczarr, nc_type* nctypep, size_t* typelenp) +{ + int stat = NC_NOERR; + nc_type nctype = NC_NAT; + size_t typelen = 0; /* use int so we can return -1 to indicate undefined */ + char testname[16]; + + if(nctypep) *nctypep = NC_NAT; + if(typelenp) *typelenp = 0; + + /* At least one must be defined */ + if((dtype == NULL || strlen(dtype) == 0) && (dnczarr == NULL || strlen(dnczarr) == 0)) + {stat = NC_ENOTZARR; goto done;} + + /* parse the dnczarr type first */ + nctype = NC_NAT; + if(dnczarr != NULL) { + if(strcmp(dnczarr,"char")==0) {nctype = NC_CHAR; typelen = 1;} + else if(strcmp(dnczarr,"string")==0) {nctype = NC_STRING; typelen = 0;} + else if(strcmp(dnczarr,NC_JSON_DTYPE_V3)==0) {nctype = NC_JSON; typelen = 0;} + } + /* Not an nczarr type, try a zarr type */ + if(nctype == NC_NAT) { + if(dtype == NULL) {dtype = dnczarr; dnczarr = NULL; /* override */} + /* make it possible to use memcmp */ + memset(testname,0,sizeof(testname)); + strncpy(testname,dtype,sizeof(testname)); + if(memcmp(testname,"int8",4)==0) {nctype = NC_BYTE; typelen = 1;} + else if(memcmp(testname,"int16",5)==0) {nctype = NC_SHORT; typelen = 1;} + else if(memcmp(testname,"int32",5)==0) {nctype = NC_INT; typelen = 1;} + else if(memcmp(testname,"int64",5)==0) {nctype = NC_INT64; typelen = 1;} + else if(memcmp(testname,"uint8",5)==0) {nctype = NC_UBYTE; typelen = 1;} + else if(memcmp(testname,"uint16",6)==0) {nctype = NC_USHORT; typelen = 1;} + else if(memcmp(testname,"uint32",6)==0) {nctype = NC_UINT; typelen = 1;} + else if(memcmp(testname,"uint64",6)==0) {nctype = NC_UINT64; typelen = 1;} + else if(memcmp(testname,"float32",7)==0) {nctype = NC_FLOAT; typelen = 1;} + else if(memcmp(testname,"float64",7)==0) {nctype = NC_DOUBLE; typelen = 1;} + else if(1==sscanf(testname,STRTEMPLATE,&typelen)) { + if((typelen % 8) == 0) { + typelen = typelen / 8; /* convert bits to bytes */ + nctype = NC_STRING; + } + } else { + stat = NC_ENOTZARR; + goto done; + } + } + if(nctypep) *nctypep = nctype; + if(typelenp) *typelenp = typelen; + +done: + return THROW(stat); +} + +/* +Extract type and data for an attribute from json +*/ +static int +computeattrinfo(NC_FILE_INFO_T* file, NC_OBJ* container, const char* aname, const NCjson* jtypes, const NCjson* jdata, struct NCZ_AttrInfo* ainfo) +{ + int stat = NC_NOERR; + const NCjson* jatype = NULL; + + ZTRACE(3,"container=%s aname=%s",container->name,aname); + + assert(aname != NULL); + + ainfo->name = aname; + ainfo->nctype = NC_NAT; + + /* Infer the attribute data's type */ + if(jtypes == NULL) { + if(strcmp(aname,NC_FillValue)==0 && container->sort == NCVAR) + ainfo->nctype = ((NC_VAR_INFO_T*)container)->type_info->hdr.id; /* Use the fill value var type */ + else { + if((stat = NCZ_inferattrtype(ainfo->name,ainfo->nctype,jdata,&ainfo->nctype))) goto done; + } + } else { + const char* atype = NULL; + /* Search the jtypes for the type of this attribute */ + NCJcheck(NCJdictget(jtypes,aname,(NCjson**)&jatype)); + if(jatype == NULL || NCJsort(jatype) != NCJ_STRING) {stat = NC_ENCZARR; goto done;} + atype = NCJstring(jatype); + /* d2type == NULL signals that this is an attribute => we only have the dnczarr type */ + if((stat=dtype2nctype(NULL,atype,&ainfo->nctype,&ainfo->typelen))) goto done; + if(ainfo->nctype >= N_NCZARR_TYPES) {stat = NC_EINTERNAL; goto done;} + } + if((stat = NCZ_computeattrdata(file,jdata,ainfo))) goto done; + +done: + return ZUNTRACEX(THROW(stat),"typeid=%d typelen=%d datalen=%u",ainfo->nctype,ainfo->typelen,ainfo->datalen); +} + +/* Build a {name,configuration} dict */ +static NCjson* +build_named_config(const char* name, int pairs, ...) +{ + int stat = NC_NOERR; + NCjson* jdict = NULL; + NCjson* jcfg = NULL; + va_list ap; + int i; + + NCJcheck(NCJnew(NCJ_DICT,&jcfg)); + /* Get the varargs */ + va_start(ap, pairs); + for(i=0;iformat_grp_info)->common.file = h5; /* For new groups, there are no atts to read from file. */ - g->atts_read = 1; + NCZ_setatts_read((NC_OBJ*)g); /* Return the ncid to the user. */ if (new_ncid) diff --git a/libnczarr/zincludes.h b/libnczarr/zincludes.h index 3fdae6c6fd..b47a130b35 100644 --- a/libnczarr/zincludes.h +++ b/libnczarr/zincludes.h @@ -19,8 +19,8 @@ #include #include #include -#include #include /* size_t, ptrdiff_t */ +#include #include #ifdef HAVE_UNISTD_H #include @@ -46,9 +46,13 @@ extern "C" { #include "ncs3sdk.h" #include "ncindex.h" #include "ncjson.h" +#include "ncproplist.h" #include "zmap.h" +#include "zmetadata.h" #include "zinternal.h" +#include "zfilter.h" +#include "zformat.h" #include "zdispatch.h" #include "zprovenance.h" #include "zodom.h" @@ -61,6 +65,7 @@ extern "C" { } #endif +#define ncz_find_default_chunksizes2 nc4_find_default_chunksizes2 #endif /* ZINCLUDES_H */ diff --git a/libnczarr/zinfer.c b/libnczarr/zinfer.c new file mode 100644 index 0000000000..f3e1358cb1 --- /dev/null +++ b/libnczarr/zinfer.c @@ -0,0 +1,466 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +/** +Given various pieces of information and a map, +infer the format to be used. +Note that format1 (the oldest NCZarr format is now disallowed). + +The current rules are as follows. + +Creation: +1. Use the Zarr format specified in the mode flags, if any. +2. Otherwise use the default Zarr format +3. Use the chosen Zarr format for the NCZarr format also. +4. Use pure zarr if mode has "zarr" or "xarray" or "noxarray" tag. + +Read: +2. If root contains ".zgroup", then +2.1 Zarr version is 2, and is verified by the .zgroup key "format". +2.2 If .zgroup contains key "_nczarr_superblock" then NCZarr version is 2.0.0 and can be verified by key "version". +2.3 Otherwise NCZarr version is NULL (i.e. pure Zarr). +3. If root subtree contains an object named "zarr.json" then +3.1 the Zarr format is V3. +3.2 If zarr.json is in root and contains key "_nczarr_superblock" then NCZarr version is 3.0.0 and can be verified by key "version". +4. If Zarr version is still unknown, then it defaults to 2. +5. If NCZarr version is still unknown then the NCZarr version is NULL (i.e. pure zarr). +*/ + +#include "zincludes.h" +#include "ncrc.h" +#include "ncjson.h" +#include "ncpathmgr.h" + +#ifdef NETCDF_ENABLE_NCZARR_ZIP +#include +#endif + +/**************************************************/ + +/* Tag search parameter */ +struct TagParam { + int zarrformat; + int nczarrformat; + int haszmetadata; +}; + +struct ZarrObjects { + const char* name; + int zarr_version; + int haszmetadata; +} zarrobjects[] = { +{"/zarr.json", ZARRFORMAT3, 0}, +{"/.zgroup", ZARRFORMAT2, 0}, +{"/.zarray", ZARRFORMAT2, 0}, +{"/.zattrs", ZARRFORMAT2, 0}, +{"/.zmetadata", ZARRFORMAT2, 1}, +{NULL, 0, 0}, +}; + +/**************************************************/ +/*Forward*/ + +static int NCZ_infer_storage_type(NC_FILE_INFO_T* file, NCURI* url, NCZM_IMPL* implp); +static int infer_create_format(NC_FILE_INFO_T* file, int* zarrformatp, int* nczarrformatp); +static int tagsearch(NCZMAP* map, const char* prefix, const char* segment, void* param); + +/**************************************************/ + +/** +Figure out the formatter to use when creating a file +@param file +@param formatterp +@return NC_NOERR | NC_EXXX +*/ + +int +NCZ_get_create_formatter(NC_FILE_INFO_T* file, const NCZ_Formatter** formatterp) +{ + int stat = NC_NOERR; + const NCZ_Formatter* formatter = NULL; + NCZ_FILE_INFO_T* zfile = NULL; + int zarr_format = 0; + int nczarr_format = 0; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + + /* Infer the zarr+nczarr formats */ + if((stat = infer_create_format(file,&zarr_format,&nczarr_format))) goto done; + zfile->zarr.zarr_format = zarr_format; + zfile->zarr.nczarr_format = nczarr_format; + + /* If the nczarr_format is NULL, then that implies pure zarr, + so use the zarr format instead. */ + if(nczarr_format != 0) { + switch(nczarr_format) { + case 2: formatter = NCZ_formatter2; break; + case 3: formatter = NCZ_formatter3; break; + default: stat = NC_ENCZARR; goto done; + } + } else { /* Decide based on zarr format plus the fact that it is pure zarr */ + switch(zarr_format) { + case 2: formatter = NCZ_formatter2; break; + case 3: formatter = NCZ_formatter3; break; + default: stat = NC_ENCZARR; goto done; + } + } + + if(formatterp) *formatterp = formatter; + +done: + return THROW(stat); +} + +static int +infer_create_format(NC_FILE_INFO_T* file, int* zarrformatp, int* nczarrformatp) +{ + int stat = NC_NOERR; + int zarrformat = 0; + int nczarrformat = NCZARRFORMAT0; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZMAP* map = zfile->map; + + NC_UNUSED(file); + NC_UNUSED(map); + + switch (zfile->zarr.zarr_format) { + case ZARRFORMAT2: + zarrformat = ZARRFORMAT2; + nczarrformat = NCZARRFORMAT2; + break; + case ZARRFORMAT3: + zarrformat = ZARRFORMAT3; + nczarrformat = NCZARRFORMAT3; + break; + default: stat = NC_ENOTZARR; break; + } + if(zarrformatp) *zarrformatp = zarrformat; + if(nczarrformatp) *nczarrformatp = nczarrformat; + return THROW(stat); +} + +/** +Figure out the formatter to use when opening a file +@param file +@param formatterp +@return NC_NOERR | NC_EXXX +*/ + +int +NCZ_get_open_formatter(NC_FILE_INFO_T* file, const NCZ_Formatter** formatterp) +{ + int stat = NC_NOERR; + const NCZ_Formatter* formatter = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + int zarr_format = zfile->zarr.zarr_format; + int nczarr_format = zfile->zarr.nczarr_format; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + + zfile->zarr.zarr_format = zarr_format; + zfile->zarr.nczarr_format = nczarr_format; + assert(zfile->zarr.zarr_format != 0); + + /* If the nczarr_format is NULL, then that implies pure zarr, + so use the zarr format instead. */ + if(nczarr_format != 0) { + switch(nczarr_format) { + case 2: formatter = NCZ_formatter2; break; + case 3: formatter = NCZ_formatter3; break; + default: stat = NC_ENCZARR; goto done; + } + } else { /* Decide based on zarr format plus the fact that it is pure zarr */ + switch(zarr_format) { + case 2: formatter = NCZ_formatter2; break; + case 3: formatter = NCZ_formatter3; break; + default: stat = NC_ENCZARR; goto done; + } + } + + if(formatterp) *formatterp = formatter; + +done: + return THROW(stat); +} + +int +NCZ_infer_open_zarr_format(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + int zarrformat = 0; + struct TagParam param = {0,0,0}; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + NC_UNUSED(file); + + /* Probe the map for tell-tale objects and dict keys */ + if(zarrformat == 0) { + struct ZarrObjects *zo = NULL; + stat = NC_ENOTZARR; // Until proven otherwise we aren't sure it's a zarr dataset + /* We search on the root path for V2 or V3 tags */ + for (zo = zarrobjects; zo->name; zo++) { + if ((stat = nczmap_exists(zfile->map,zo->name)) == NC_NOERR) { + zarrformat = zo->zarr_version; + break; /* No need to look for more keys */ + } + } + } + if(zarrformat == 0 || stat != NC_NOERR) { + /* As a last resort, we need to search subtree for a V2 or V3 tag */ + switch(stat = nczmap_walk(zfile->map,"/",tagsearch, ¶m)) { + case NC_ENOOBJECT: + /* No tag was found, so its not a zarr file */ + stat = NC_ENOTZARR; + goto done; + case NC_NOERR: /* found and format is in param */ + switch(param.zarrformat) { + case ZARRFORMAT2: case ZARRFORMAT3: + zarrformat = param.zarrformat; + break; + default: + stat = NC_ENOTZARR; + goto done; + } + break; + default: + stat = NC_ENOTZARR; + goto done; + } + } + if(zarrformat == 0) {stat = NC_ENOTZARR; goto done;} + zfile->zarr.zarr_format = zarrformat; + +done: + return THROW(stat); +} + +int +NCZ_infer_open_nczarr_format(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCjson* jrootgrp = NULL; + const NCjson* jsuperg = NULL; + const NCjson* jsupera = NULL; + NCjson* jrootatts = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + int zarrformat = zfile->zarr.zarr_format; + int nczarrformat = 0; + + if(zarrformat == ZARRFORMAT2) { + /* Download /.zgroup and /.zattrs */ + if((stat = NCZMD_fetch_json_content(file, NCZMD_GROUP, Z2METAROOT, &jrootgrp))) goto done; + if((stat = NCZMD_fetch_json_content(file, NCZMD_ATTRS, Z2ATTSROOT, &jrootatts))) goto done; + /* Look for superblock */ + if(jrootgrp != NULL) NCJdictget(jrootgrp,NC_NCZARR_SUPERBLOCK_ATTR,(NCjson**)&jsuperg); + if(jrootatts != NULL) NCJdictget(jrootatts,NC_NCZARR_SUPERBLOCK_ATTR,(NCjson**)&jsupera); + if(jsuperg == NULL && jsupera == NULL) nczarrformat = NCZARRFORMAT0; else nczarrformat = NCZARRFORMAT2; + NCZ_reclaim_json(jrootgrp); jrootgrp = NULL; + NCZ_reclaim_json(jrootatts); jrootatts = NULL; + } else +#ifdef NETCDF_ENABLE_NCZARR_V3 + if(zarrformat == ZARRFORMAT3 && nczarrformat == 0) { + const NCjson* jrootatts = NULL; + /* Look for "/zarr.json" */ + if((stat = NCZMD_fetch_json_content(file, NCZMD_GROUP, Z3METAROOT, &jrootgrp))) goto done; + if(jrootgrp == NULL || NCJsort(jrootgrp) != NCJ_DICT) { + nczarrformat = NCZARRFORMAT0; + } else { + NCJdictget(jrootgrp,"attributes",(NCjson**)&jrootatts); + if(jrootatts == NULL || NCJsort(jrootatts) != NCJ_DICT) { + nczarrformat = NCZARRFORMAT0; + } else { + /* Look for the _nczarr_superblock tag */ + NCJdictget(jrootatts,NC_NCZARR_SUPERBLOCK_ATTR,(NCjson**)&jsupera); + if(jsupera == NULL) + nczarrformat = NCZARRFORMAT0; /* Pure zarr V3 file */ + else + nczarrformat = NCZARRFORMAT3; + } + } + NCZ_reclaim_json(jrootgrp); jrootgrp = NULL; + } else +#else + {stat = NC_ENOTBUILT; goto done;} +#endif + + if(nczarrformat == 0) nczarrformat = zarrformat; + zfile->zarr.nczarr_format = nczarrformat; + +done: + NCZ_reclaim_json(jrootgrp); + NCZ_reclaim_json(jrootatts); + return THROW(stat); +} + +/* +Figure out the zarr format based on the +top-level keys of the dataset. +*/ +static int +tagsearch(NCZMAP* map, const char* prefix, const char* key, void* param) +{ + struct TagParam* formats = (struct TagParam*)param; + const char* segment = NULL; + size_t seglen = 0; + struct ZarrObjects* zo = NULL; + + NC_UNUSED(map); + NC_UNUSED(prefix); + + /* Validate */ + segment = strrchr(key,'/'); + if(segment == NULL) segment = key; else segment++; + seglen = strlen(segment); + if(seglen == 0) return NC_NOERR; + + for(zo=zarrobjects;zo->name;zo++) { + if(strcasecmp(segment,zo->name+1)==0) { + formats->zarrformat = zo->zarr_version; + return NC_NOERR; /* tell walker to stop */ + } + } + return NC_ENOOBJECT; /* Keep looking */ +} + +/**************************************************/ +/** +Given various pieces of information and a URL, +infer the store type: currently file,zip,s3. + +The current rules are as follows. + +Creation: +1. Use the store type specified in the URL: "file", "zip", "s3". + +Read: +1. If the URL specifies a store type, then use that type unconditionally. +2. If the URL protocol is "file", then treat the URL path as a file path. +2.1 If the path references a directory, then the store type is "file". +2.2 If the path references a file, and can be opened by libzip, then the store type is "zip" +2.3 Otherwise fail with NC_ENOTZARR. +3. If the url protocol is "http" or "https" then: +3.1 Apply the function NC_iss3 and if it succeeds, the store type is s3|gs3. +3.2 Apply the function NC_iszoh and if it succeeds, the store type is Zarr-Over-HTTP. +3.3 If the mode contains "file", then storetype is file -- meaning REST API to a file store. +*/ + +static int +NCZ_infer_storage_type(NC_FILE_INFO_T* file, NCURI* url, NCZM_IMPL* implp) +{ + int ret = NC_NOERR; + int create; + NCZM_IMPL impl = NCZM_UNDEF; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + NC_UNUSED(file); + + assert(zfile != NULL); + create = zfile->creating; + + /* mode storetype overrides all else */ + if(NC_testmode(url, "file")) impl = NCZM_FILE; +#ifdef NETCDF_ENABLE_S3 + else if(NC_testmode(url, "s3")) impl = NCZM_S3; + else if(NC_testmode(url, "gs3")) impl = NCZM_GS3; +#ifdef NETCDF_ENABLE_ZOH + else if(NC_testmode(url, "zoh")) impl = NCZM_ZOH; +#endif +#endif +#ifdef NETCDF_ENABLE_NCZARR_ZIP + else if(NC_testmode(url, "zip")) impl = NCZM_ZIP; +#endif + if(!create) { /* Reading a file of some kind */ + if(strcasecmp(url->protocol,"file")==0) { + struct stat buf; + /* Storage: file,zip,... */ + if(NCstat(url->path,&buf)<0) {ret = errno; goto done;} + if(S_ISDIR(buf.st_mode)) + impl = NCZM_FILE; /* only possibility */ +#ifdef NETCDF_ENABLE_NCZARR_ZIP + else if(S_ISREG(buf.st_mode)) { + /* Should be zip, but verify */ + zip_flags_t zipflags = ZIP_RDONLY; + zip_t* archive = NULL; + int zerrno = ZIP_ER_OK; + /* Open the file */ + archive = zip_open(url->path,(int)zipflags,&zerrno); + if(archive != NULL) { + impl = NCZM_ZIP; + zip_close(archive); + } + } +#endif + } + } + + if(impl == NCZM_UNDEF) + {ret = NC_EURL; goto done;} + + if(implp) *implp = impl; +done: + return THROW(ret); +} + +/** +Figure out the storage type and create and return a corresponding map. + +@param file +@param url +@param mode +@param constraints +@param params +@return NC_NOERR | NC_EXXX +*/ + +int +NCZ_get_map(NC_FILE_INFO_T* file, NCURI* url, mode_t mode, size64_t constraints, void* params, NCZMAP** mapp) +{ + int stat = NC_NOERR; + int create = 0; + NCZMAP* map = NULL; + NCZM_IMPL impl = NCZM_UNDEF; + NCZ_FILE_INFO_T* zfile = NULL; + char* path = NULL; + + zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + assert(zfile != NULL); + create = zfile->creating; + + if((stat = NCZ_infer_storage_type(file, url, &impl))) goto done; + + if((path = ncuribuild(url,NULL,NULL,NCURIALL))==NULL) {stat = NC_ENCZARR; goto done;} + + switch (impl) { + case NCZM_FILE: case NCZM_ZIP: case NCZM_S3: case NCZM_GS3: + if(create) + {if((stat = nczmap_create(impl,path,mode,constraints,params,&map))) goto done;} + else + {if((stat = nczmap_open(impl,path,mode,constraints,params,&map))) goto done;} + break; +#ifdef NETCDF_ENABLE_ZOH + case NCZM_ZOH: + if(create) {stat = NC_ENOTZARR; goto done;} + constraints |= FLAG_ZOH; + if((stat = nczmap_open(impl,path,mode,constraints,params,&map))) goto done; + break; +#endif + case NCZM_UNDEF: + stat = NC_EURL; + goto done; + default: + stat = NC_ENOTZARR; + goto done; + } + + if(mapp) {*mapp = map; map = NULL;} + +done: + nullfree(path); + if(map) (void)nczmap_close(map,0); + return THROW(stat); +} + diff --git a/libnczarr/zinternal.c b/libnczarr/zinternal.c index cb9b4ff850..e2ceb4febb 100644 --- a/libnczarr/zinternal.c +++ b/libnczarr/zinternal.c @@ -17,6 +17,7 @@ #include "zincludes.h" #include "zfilter.h" +#include "zfill.h" /* Forward */ @@ -27,6 +28,9 @@ extern int nc_log_level; #endif /* LOGGING */ +const NCproplist* NCplistzarrv2 = NULL; +const NCproplist* NCplistzarrv3 = NULL; + #ifdef LOOK /** * @internal Provide a wrapper for H5Eset_auto @@ -61,14 +65,31 @@ NCZ_initialize_internal(void) ngs = NC_getglobalstate(); if(ngs != NULL) { /* Defaults */ - ngs->zarr.dimension_separator = DFALT_DIM_SEPARATOR; + ngs->zarr.default_zarrformat = DFALTZARRFORMAT; + /* Allow .rc file override */ dimsep = NC_rclookup("ZARR.DIMENSION_SEPARATOR",NULL,NULL); if(dimsep != NULL) { /* Verify its value */ if(dimsep != NULL && strlen(dimsep) == 1 && islegaldimsep(dimsep[0])) ngs->zarr.dimension_separator = dimsep[0]; } + /* Allow environment variable override */ + if(getenv(NCZARRDEFAULTFORMAT) != NULL) { + int dfalt = 0; + sscanf(getenv(NCZARRDEFAULTFORMAT),"%d",&dfalt); + if(dfalt == 2 || dfalt == 3) + ngs->zarr.default_zarrformat = dfalt; + } + if(ngs->zarr.default_zarrformat == 3) + ngs->zarr.dimension_separator = DFALT_DIM_SEPARATOR_V3; + else + ngs->zarr.dimension_separator = DFALT_DIM_SEPARATOR_V2; } + /* Build some common proplists */ + NCplistzarrv2 = ncproplistnew(); + ncproplistadd((NCproplist*)NCplistzarrv2,"zarrformat",(uintptr_t)2); + NCplistzarrv3 = ncproplistnew(); + ncproplistadd((NCproplist*)NCplistzarrv3,"zarrformat",(uintptr_t)3); return stat; } @@ -88,6 +109,11 @@ NCZ_finalize_internal(void) #ifdef NETCDF_ENABLE_S3 NCZ_s3finalize(); #endif + + /* Cleanup common proplists */ + ncproplistfree((NCproplist*)NCplistzarrv2); NCplistzarrv2 = NULL; + ncproplistfree((NCproplist*)NCplistzarrv3); NCplistzarrv3 = NULL; + return NC_NOERR; } @@ -104,12 +130,13 @@ NCZ_finalize_internal(void) * @author Dennis Heimbigner, Ed Hartnett */ static int -find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid, - size_t *maxlen) +find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid, size_t *maxlen) { NC_VAR_INFO_T *var; int retval = NC_NOERR; + NC_UNUSED(dimid); + *maxlen = 0; /* Find this var. */ @@ -125,94 +152,10 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid, else { /* Get the number of records in the dataset. */ -#ifdef LOOK -#if 0 -not needed if ((retval = ncz_open_var_grp2(grp, var->hdr.id, &datasetid))) - BAIL(retval); -#endif - if ((spaceid = H5Dget_space(datasetid)) < 0) - BAIL(NC_EHDFERR); - /* If it's a scalar dataset, it has length one. */ - if (H5Sget_simple_extent_type(spaceid) == H5S_SCALAR) - { - *maxlen = (var->dimids && var->dimids[0] == dimid) ? 1 : 0; - } - else - { - /* Check to make sure ndims is right, then get the len of each - dim in the space. */ - if ((dataset_ndims = H5Sget_simple_extent_ndims(spaceid)) < 0) - BAIL(NC_EHDFERR); - if (dataset_ndims != var->ndims) - BAIL(NC_EHDFERR); - if (!(h5dimlen = malloc(dataset_ndims * sizeof(hsize_t)))) - BAIL(NC_ENOMEM); - if (!(h5dimlenmax = malloc(dataset_ndims * sizeof(hsize_t)))) - BAIL(NC_ENOMEM); - if ((dataset_ndims = H5Sget_simple_extent_dims(spaceid, - h5dimlen, h5dimlenmax)) < 0) - BAIL(NC_EHDFERR); - LOG((5, "find_var_dim_max_length: varid %d len %d max: %d", - varid, (int)h5dimlen[0], (int)h5dimlenmax[0])); - for (d=0; ddimids[d] == dimid) { - *maxlen = *maxlen > h5dimlen[d] ? *maxlen : h5dimlen[d]; - } - } - } -#endif /*LOOK*/ } - -#ifdef LOOK -exit: - if (spaceid > 0 && H5Sclose(spaceid) < 0) - BAIL2(NC_EHDFERR); - if (h5dimlen) free(h5dimlen); - if (h5dimlenmax) free(h5dimlenmax); -#endif return retval; } -#ifdef LOOK -/** - * @internal Search for type with a given HDF type id. - * - * @param h5 File - * @param target_hdf_typeid ZARR type ID to find. - * - * @return Pointer to type info struct, or NULL if not found. - * @author Dennis Heimbigner, Ed Hartnett - */ -NC_TYPE_INFO_T * -ncz_rec_find_hdf_type(NC_FILE_INFO_T *h5, hid_t target_hdf_typeid) -{ - NC_TYPE_INFO_T *type; - int i; - - assert(h5); - - for (i = 0; i < nclistlength(h5->alltypes); i++) - { - type = (NC_TYPE_INFO_T*)nclistget(h5->alltypes, i); - if(type == NULL) continue; - -#ifdef LOOK - /* Select the ZARR typeid to use. */ - hdf_typeid = ncz_type->native_hdf_typeid ? - ncz_type->native_hdf_typeid : ncz_type->hdf_typeid; - - /* Is this the type we are searching for? */ - if ((equal = H5Tequal(hdf_typeid, target_hdf_typeid)) < 0) - return NULL; - if (equal) - return type; -#endif - } - /* Can't find it. Fate, why do you mock me? */ - return NULL; -} -#endif - /** * @internal Find the actual length of a dim by checking the length of * that dim in all variables that use it, in grp or children. **len @@ -260,277 +203,6 @@ ncz_find_dim_len(NC_GRP_INFO_T *grp, int dimid, size_t **len) return NC_NOERR; } -#if 0 -/** - * @internal Close ZARR resources for global atts in a group. - * - * @param grp Pointer to group info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EHDFERR ZARR error. - * @author Dennis Heimbigner, Ed Hartnett - */ - -static int -close_gatts(NC_GRP_INFO_T *grp) -{ - NC_ATT_INFO_T *att; - int a; - - for (a = 0; a < ncindexsize(grp->att); a++) - { - att = (NC_ATT_INFO_T *)ncindexith(grp->att, a); - assert(att && att->format_att_info); - -#ifdef LOOK - /* Close the ZARR typeid. */ - if (ncz_att->native_hdf_typeid && - H5Tclose(ncz_att->native_hdf_typeid) < 0) - return NC_EHDFERR; -#endif - } - return NC_NOERR; -} -#endif /*0*/ - -#if 0 -/** - * @internal Close ZARR resources for vars in a group. - * - * @param grp Pointer to group info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EHDFERR ZARR error. - * @author Dennis Heimbigner, Ed Hartnett - */ -static int -close_vars(NC_GRP_INFO_T *grp) -{ - NC_VAR_INFO_T *var; - NC_ATT_INFO_T *att; - int a, i; - - for (i = 0; i < ncindexsize(grp->vars); i++) - { - var = (NC_VAR_INFO_T *)ncindexith(grp->vars, i); - assert(var && var->format_var_info); - - /* Close the ZARR dataset associated with this var. */ -#ifdef LOOK - if (ncz_var->hdf_datasetid) -#endif - { -#ifdef LOOK - LOG((3, "closing ZARR dataset %lld", ncz_var->hdf_datasetid)); - if (H5Dclose(ncz_var->hdf_datasetid) < 0) - return NC_EHDFERR; -#endif - if (var->fill_value) - { - if (var->type_info) - { - int stat = NC_NOERR; - if((stat = NC_reclaim_data(grp->nc4_info,var->type_info->hdr.id,var->fill_value,1))) - return stat; - nullfree(var->fill_value); - } - } - } - -#ifdef LOOK - /* Delete any ZARR dimscale objid information. */ - if (ncz_var->dimscale_ncz_objids) - free(ncz_var->dimscale_ncz_objids); -#endif - - for (a = 0; a < ncindexsize(var->att); a++) - { - att = (NC_ATT_INFO_T *)ncindexith(var->att, a); - assert(att && att->format_att_info); - -#ifdef LOOK - /* Close the ZARR typeid if one is open. */ - if (ncz_att->native_hdf_typeid && - H5Tclose(ncz_att->native_hdf_typeid) < 0) - return NC_EHDFERR; -#endif - } - - /* Reclaim filters */ - if(var->filters != NULL) { - (void)NCZ_filter_freelists(var); - } - var->filters = NULL; - - } - - return NC_NOERR; -} -#endif /*0*/ - -#if 0 -/** - * @internal Close ZARR resources for dims in a group. - * - * @param grp Pointer to group info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EHDFERR ZARR error. - * @author Dennis Heimbigner, Ed Hartnett - */ -static int -close_dims(NC_GRP_INFO_T *grp) -{ - NC_DIM_INFO_T *dim; - size_t i; - - for (i = 0; i < ncindexsize(grp->dim); i++) - { - dim = (NC_DIM_INFO_T *)ncindexith(grp->dim, i); - assert(dim && dim->format_dim_info); - -#ifdef LOOK - /* If this is a dim without a coordinate variable, then close - * the ZARR DIM_WITHOUT_VARIABLE dataset associated with this - * dim. */ - if (ncz_dim->hdf_dimscaleid && H5Dclose(ncz_dim->hdf_dimscaleid) < 0) - return NC_EHDFERR; -#endif - } - - return NC_NOERR; -} -#endif /*0*/ - -#if 0 -/** - * @internal Close ZARR resources for types in a group. Set values to - * 0 after closing types. Because of type reference counters, these - * closes can be called multiple times. - * - * @param grp Pointer to group info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EHDFERR ZARR error. - * @author Dennis Heimbigner, Ed Hartnett - */ -static int -close_types(NC_GRP_INFO_T *grp) -{ - size_t i; - - for (i = 0; i < ncindexsize(grp->type); i++) - { - NC_TYPE_INFO_T *type; - - type = (NC_TYPE_INFO_T *)ncindexith(grp->type, i); - assert(type && type->format_type_info); - -#ifdef LOOK - /* Close any open user-defined ZARR typeids. */ - if (ncz_type->hdf_typeid && H5Tclose(ncz_type->hdf_typeid) < 0) - return NC_EHDFERR; - ncz_type->hdf_typeid = 0; - if (ncz_type->native_hdf_typeid && - H5Tclose(ncz_type->native_hdf_typeid) < 0) - return NC_EHDFERR; - ncz_type->native_hdf_typeid = 0; -#endif - } - - return NC_NOERR; -} -#endif /*0*/ - -#if 0 -/** - * @internal Recursively free ZARR objects for a group (and everything - * it contains). - * - * @param grp Pointer to group info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EHDFERR ZARR error. - * @author Dennis Heimbigner, Ed Hartnett - */ -static int -ncz_rec_grp_NCZ_del(NC_GRP_INFO_T *grp) -{ - size_t i; - int retval; - - assert(grp && grp->format_grp_info); - LOG((3, "%s: grp->name %s", __func__, grp->hdr.name)); - - /* Recursively call this function for each child, if any, stopping - * if there is an error. */ - for (i = 0; i < ncindexsize(grp->children); i++) - if ((retval = ncz_rec_grp_NCZ_del((NC_GRP_INFO_T *)ncindexith(grp->children, - i)))) - return retval; - - /* Close ZARR resources associated with global attributes. */ - if ((retval = close_gatts(grp))) - return retval; - - /* Close ZARR resources associated with vars. */ - if ((retval = close_vars(grp))) - return retval; - - /* Close ZARR resources associated with dims. */ - if ((retval = close_dims(grp))) - return retval; - - /* Close ZARR resources associated with types. */ - if ((retval = close_types(grp))) - return retval; - - /* Close the ZARR group. */ - LOG((4, "%s: closing group %s", __func__, grp->hdr.name)); -#ifdef LOOK - if (ncz_grp->hdf_grpid && H5Gclose(ncz_grp->hdf_grpid) < 0) - return NC_EHDFERR; -#endif - - return NC_NOERR; -} -#endif /*0*/ - -/** - * @internal Given an ncid and varid, get pointers to the group and var - * metadata. Lazy var metadata reads are done as needed. - * - * @param ncid File ID. - * @param varid Variable ID. - * @param h5 Pointer that gets pointer to the NC_FILE_INFO_T struct - * for this file. Ignored if NULL. - * @param grp Pointer that gets pointer to group info. Ignored if - * NULL. - * @param var Pointer that gets pointer to var info. Ignored if NULL. - * - * @return ::NC_NOERR No error. - * @return ::NC_ENOTVAR Variable not found. - * @author Dennis Heimbigner, Ed Hartnett - */ -int -ncz_find_grp_file_var(int ncid, int varid, NC_FILE_INFO_T **h5, - NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var) -{ - NC_FILE_INFO_T *my_h5; - NC_VAR_INFO_T *my_var; - int retval; - - /* Delegate to libsrc4 */ - if((retval = nc4_find_grp_h5_var(ncid,varid,&my_h5,grp,&my_var))) return retval; - - /* Do we need to read var metadata? */ - if (!my_var->meta_read && my_var->created) - if ((retval = ncz_get_var_meta(my_h5, my_var))) - return retval; - if (var) *var = my_var; - if (h5) *h5 = my_h5; - return NC_NOERR; -} /** * @internal Given an ncid, varid, and attribute name, return @@ -591,7 +263,7 @@ ncz_find_grp_var_att(int ncid, int varid, const char *name, int attnum, /* Read the attributes for this var, if any */ switch (retval = ncz_getattlist(my_grp, varid, &my_var, &attlist)) { case NC_NOERR: assert(attlist); break; - case NC_EEMPTY: retval = NC_NOERR; attlist = NULL; break; /* variable has no attributes */ + case NC_ENOOBJECT: retval = NC_NOERR; attlist = NULL; break; /* variable has no attributes */ default: return retval; /* significant error */ } @@ -629,7 +301,7 @@ ncz_find_grp_var_att(int ncid, int varid, const char *name, int attnum, } /** - * @internal Ensure that either var->no_fill || var->fill_value != NULL. + * @internal Ensure that either var->no_fill == NC_NOFILL || var->fill_value != NULL. * Side effects: set as default if necessary and build _FillValue attribute. * * @param h5 Pointer to file info struct. @@ -642,75 +314,25 @@ ncz_find_grp_var_att(int ncid, int varid, const char *name, int attnum, int NCZ_ensure_fill_value(NC_VAR_INFO_T *var) { - size_t size; - int retval = NC_NOERR; - NC_FILE_INFO_T *h5 = var->container->nc4_info; + int stat = NC_NOERR; - if(var->no_fill) + if(var->no_fill == NC_NOFILL) return NC_NOERR; -#if 0 /*LOOK*/ - /* Find out how much space we need for this type's fill value. */ - if (var->type_info->nc_type_class == NC_VLEN) - size = sizeof(nc_vlen_t); - else if (var->type_info->nc_type_class == NC_STRING) - size = sizeof(char *); - else -#endif - - if ((retval = nc4_get_typelen_mem(h5, var->type_info->hdr.id, &size))) goto done; - assert(size); - /* If the user has set a fill_value for this var, use, otherwise find the default fill value. */ - - if (var->fill_value == NULL) { - /* initialize the fill_value to the default */ - /* Allocate the fill_value space. */ - if((var->fill_value = calloc(1, size))==NULL) - {retval = NC_ENOMEM; goto done;} - if((retval = nc4_get_default_fill_value(var->type_info, var->fill_value))) { - /* Note: release memory, but don't return error on failure */ - (void)NCZ_reclaim_fill_value(var); - retval = NC_NOERR; - goto done; - } + if(var->fill_value == NULL) { + NC_FILE_INFO_T* file = var->container->nc4_info; + nc_type vartid = var->type_info->hdr.id; + void* dfalt = NCZ_getdfaltfillvalue(vartid); + if((stat = NCZ_set_dual_obj_data(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,1,dfalt))) goto done; + /* synchronize to attribute */ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXATT))) goto done; } assert(var->fill_value != NULL); LOG((4, "Found a fill value for var %s", var->hdr.name)); -#if 0 /*LOOK*/ - /* Need to copy both vlen and a single basetype */ - if (var->type_info->nc_type_class == NC_VLEN) - { - nc_vlen_t *in_vlen = (nc_vlen_t *)(var->fill_value); - nc_vlen-t *fv_vlen = (nc_vlen_t *)fill; - size_t basetypesize = 0; - - if((retval=nc4_get_typelen_mem(h5, var->type_info->u.v.base_nc_typeid, &basetypesize))) - return retval; - - fv_vlen->len = in_vlen->len; - if (!(fv_vlen->p = malloc(basetypesize * in_vlen->len))) - { - free(*fillp); - *fillp = NULL; - return NC_ENOMEM; - } - memcpy(fv_vlen->p, in_vlen->p, in_vlen->len * basetypesize); - } - else if (var->type_info->nc_type_class == NC_STRING) - { - if (*(char **)var->fill_value) - if (!(**(char ***)fillp = strdup(*(char **)var->fill_value))) - { - free(*fillp); - *fillp = NULL; - return NC_ENOMEM; - } - } -#endif /*0*/ done: - return retval; + return THROW(stat); } #ifdef LOGGING @@ -808,3 +430,52 @@ NCZ_inq_format_extended(int ncid, int *formatp, int *modep) return NC_NOERR; } + + +void +zdfaltstrlen(size_t* p, size_t strlen) +{ + *p = strlen; +} + +void +zmaxstrlen(size_t* p, size_t strlen) +{ + *p = strlen; +} + + +void +zsetmaxstrlen(size_t maxstrlen, NC_VAR_INFO_T* var) +{ + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + zvar->maxstrlen = maxstrlen; + if(zvar->maxstrlen == 0) zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var); +} + +void +zsetdfaltstrlen(size_t dfaltstrlen, NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + zfile->default_maxstrlen = dfaltstrlen; + if(zfile->default_maxstrlen == 0) zfile->default_maxstrlen = NCZ_MAXSTR_DFALT; +} + +void +zsetdimsep(char sep, NC_VAR_INFO_T* var) +{ + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + zvar->dimension_separator = sep; + if(zvar->dimension_separator == '\0') zvar->dimension_separator = NCZ_get_dimsep(var); +} + +void +zsetdfaltdimsep(char dimsep, NC_FILE_INFO_T* file) +{ + NCglobalstate* gs = NC_getglobalstate(); + NC_UNUSED(file); + assert(gs != NULL); + assert(gs->zarr.dimension_separator != '\0'); + gs->zarr.dimension_separator = dimsep; + assert(gs->zarr.dimension_separator != '\0'); +} diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 2548ad54ba..75dd81eebb 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -12,11 +12,35 @@ #ifndef ZINTERNAL_H #define ZINTERNAL_H -#define ZARRVERSION "2" +/* This is the version of this NCZarr package */ +/* This completely independent of the Zarr specification version */ +#define NCZARR_PACKAGE_VERSION "3.0.0" -/* NCZARRVERSION is independent of Zarr version, - but NCZARRVERSION => ZARRVERSION */ -#define NCZARRVERSION "2.0.0" +/* Allowed Zarr Formats */ +#define ZARRFORMAT2 2 +#define ZARRFORMAT3 3 + +/* Mode encoded formats */ +#define ZARRFORMAT2_STRING "v2" +#define ZARRFORMAT3_STRING "v3" + +/* Define the possible NCZarr format versions */ +/* These are independent of the Zarr specification version */ +#define NCZARRFORMAT0 0 /* if this is a pure zarr dataset */ +#define NCZARRFORMAT2 2 +#define NCZARRFORMAT3 3 + +/* Map the Zarr Format version to a string */ +#define ZARR_FORMAT_VERSION_TEMPLATE "%d" + +/* Map the NCZarr Format version to a string */ +#define NCZARR_FORMAT_VERSION_TEMPLATE "%d.0.0" + +/* The name of the env var for changing default zarr format */ +#define NCZARRDEFAULTFORMAT "NCZARRFORMAT" + +/* The name of the env var for controlling .zmetadata use*/ +#define NCZARRDEFAULTNOMETA "NCNOZMETADATA" /* These have to do with creating chunked datasets in ZARR. */ #define NCZ_CHUNKSIZE_FACTOR (10) @@ -38,77 +62,197 @@ # endif #endif -#define ZMETAROOT "/.zgroup" -#define ZMETAATTR "/.zattrs" -#define ZGROUP ".zgroup" -#define ZATTRS ".zattrs" -#define ZARRAY ".zarray" +/* V2 Reserved Objects */# +#define Z2METADATA "/.zmetadata" +#define Z2METAROOT "/.zgroup" +#define Z2ATTSROOT "/.zattrs" +#define Z2GROUP ".zgroup" +#define Z2ATTRS ".zattrs" +#define Z2ARRAY ".zarray" + +/* V3 Reserved Objects */ +#define Z3METADATA "/zarr.json" +#define Z3METAROOT Z3METADATA +#define Z3OBJECT "zarr.json" +#define Z3GROUP Z3OBJECT +#define Z3ARRAY Z3OBJECT +#define Z3CHUNK "c" + +/* Bytes codec name */ +#define ZBYTES3 "bytes" /* V2 Reserved Attributes */ /* -For nczarr version 2.x.x, the following (key,value) -pairs are stored in .zgroup and/or .zarray. +For nczarr versions 2.x.x, the following (key,value) +pairs are stored as if they were standard attributes. +The cost is that lazy attribute reading is no longer possible. -Inserted into /.zattrs in root group -_nczarr_superblock: {"version": "2.0.0"} +Inserted into /.zattrs in the root group +_nczarr_superblock: {"version": "3.0.0", "format=2"} -Inserted into any group level .zattrs +Inserted into any .zattrs (at group level) "_nczarr_group": "{ -\"dimensions\": [{name: , size: , unlimited: 1|0},...], +\"dimensions\": {: , : ,...} \"arrays\": [\"v1\", \"v2\", ...] \"groups\": [\"g1\", \"g2\", ...] }" -Inserted into any array level .zattrs +Optionally insert into any .zattrs (at root group level) +\"_nczarr_default_maxstrlen\": +This is needed only when writing a dataset. When reading, it should be redundant +vis-a-vis the actual length (e.g. "|S6") of the dtype of a string variable. + +Inserted into any .zattrs (at array level) "_nczarr_array": "{ -\"dimension_references\": [\"/g1/g2/d1\", \"/d2\",...] -\"storage\": \"scalar\"|\"contiguous\"|\"chunked\" +\"dimension_references\": [\"/g1/g2/d1\", \"/d2\",...], +\"storage\": \"contiguous\" | \"chunked\", +\"scalar\": 0 | 1 }" +Note that the storage key is probably irrelevant currently because +all data is stored in Zarr in the equivalent of "chunked". +Note also that if scalar is "1", then storage will still be chunked, +and the array will have shape of "[1]" and optionally, a dimension name +of "_scalar_". For external pure zarr datasets, there will not be enough +information to signal scalar, so such an array will be treated as a one +element chunk. + +If an array type is a string, the optionally insert into any .zattrs (at array level) +\"_nczarr_maxstrlen\": +This is needed only when writing a dataset. When reading, it should be redundant +vis-a-vis the length (e.g. "|S6") of the dtype of the variable. Inserted into any .zattrs -"_nczarr_attr": "{ +"_nczarr_attrs": "{ \"types\": {\"attr1\": \", size: , unlimited: 1|0},...], + The V2 format is also acceptable: {: , : ,...} +\"arrays\": ["",...], +\"subgroups\": ["",...] +}" + +Optionally inserted into any array zarr.json as an attribute: +```` +"_nczarr_array": "{ +\"dimension_references\": [\"/g1/g2/d1\", \"/d2\",...], +\"nczarr_type\": "" // optional +\"maxstrlen\": "" // optional +}" +```` +The *dimension_references* key is an expansion of the "dimensions" key +found in the *zarr.json* object for an array. +The problem with "dimensions" is that it specifies a simple name for each +dimension, whereas netcdf-4 requires that the array references dimension objects +that may appear in groups anywhere in the file. These references are encoded +as FQNs "pointing" to a specific dimension declaration (see *_nczarr_group* attribute +defined previously). + +FQN is an acronym for "Fully Qualified Name". +It is a series of names separated by the "/" character, much +like a file system path. +It identifies the group in which the dimension is ostensibly "defined" in the Netcdf sense. +For example ````/d1```` defines a dimension "d1" defined in the root group. +Similarly ````/g1/g2/d2```` defines a dimension "d2" defined in the +group g2, which in turn is a subgroup of group g1, which is a subgroup +of the root group. + +The *nczarr_type* key is used to annotate the type of an array +to allow use of netcdf-4 specific types not represented in the standard Zarr V3 types. +Specifically, there are three current cases: +| dtype | nczarr_type | +| ----- | ----------- | +| uint8 | char | +| rn | string | + +If, for example, an array's dtype is specified as *uint8*, then it may be that +it is actually of unsigned 8-bit integer type. But it may actually be of some +netcdf-4 type that is encoded as *uint8* in order to be recognized by other -- pure zarr-- +implementations. So, for example, if the netcdf-4 type is *char*, then the array's +dtype is *uint8*, but its nczarr type is *char*. + +Optionally Inserted into any group zarr.json or array zarr.json is the extra attribute. +"_nczarr_attrs": {\"attribute_types\": [{\"name\": \"attr1\", \"configuration\": {\"type\": \"\"}}, ...]} + */ -#define NCZ_V2_SUPERBLOCK "_nczarr_superblock" -#define NCZ_V2_GROUP "_nczarr_group" -#define NCZ_V2_ARRAY "_nczarr_array" -#define NCZ_V2_ATTR "_nczarr_attr" /* Must match value in include/nc4internal.h */ +/* Common constants for both V2 and V3 */ +/* Must match values in include/nc4internal.h */ +#define NCZ_PREFIX "_nczarr" +#define NCZ_ATTR_OLD_ATTR NC_NCZARR_ATTR_ATTR #define NCZARRCONTROL "nczarr" #define PUREZARRCONTROL "zarr" #define XARRAYCONTROL "xarray" #define NOXARRAYCONTROL "noxarray" #define XARRAYSCALAR "_scalar_" +#define DIMSCALAR "/_scalar_" +#define FORMAT2CONTROL "v2" +#define FORMAT3CONTROL "v3" +#define ZMETADATACONTROL "zmetadata" +#define NOZMETADATACONTROL "nozmetadata" -#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen" -#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen" - -#define LEGAL_DIM_SEPARATORS "./" -#define DFALT_DIM_SEPARATOR '.' +#define LEGAL_DIM_SEPARATORS "/." +#define DFALT_DIM_SEPARATOR_V2 '.' +#define DFALT_DIM_SEPARATOR_V3 '/' #define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) +/* Extend the type system */ +#define NC_JSON (NC_STRING+1) +#define N_NCZARR_TYPES (NC_JSON+1) +#define NC_JSON_DTYPE_V2 "|J0" +#define NC_JSON_DTYPE_V3 "json" + /* Default max string length for fixed length strings */ -#define NCZ_MAXSTR_DEFAULT 128 +#define NCZ_MAXSTR_DFALT 128 /* Mnemonics */ #define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ #define ZREADING 1 /* this is reading data rather than writing */ +#define FIXATT 0 +#define FIXOBJ 1 + +//#define FORVAR 1 +//#define FORGRP 2 + +/* Track the possible cases where a field in some NC_XXX_INFO_T* + must be sync'd with corresponding Attribute +*/ +typedef enum DualAtt {DA_NOT, DA_FILLVALUE, DA_MAXSTRLEN, DA_DFALTSTRLEN, DA_SEP, DA_DFALTSEP, DA_QUANTIZE, DA_ALL} DualAtt; /* Useful macro */ #define ncidforx(file,grpid) ((file)->controller->ext_ncid | (grpid)) #define ncidfor(var) ncidforx((var)->container->nc4_info,(var)->container->hdr.id) /**************************************************/ -/* Forward */ +/* Opaque */ struct NClist; struct NCjson; struct NCauth; struct NCZMAP; struct NCZChunkCache; +struct NCZ_Formatter; +struct NCproplist; +struct NCZ_META_HDR; /**************************************************/ /* Define annotation data for NCZ objects */ @@ -123,32 +267,35 @@ typedef struct NCZ_FILE_INFO { NCZcommon common; struct NCZMAP* map; /* implementation */ struct NCauth* auth; - struct nczarr { - int zarr_version; - struct { - unsigned long major; - unsigned long minor; - unsigned long release; - } nczarr_version; + struct NCZ_Metadata metadata_handler; + struct Zarrformat { + int zarr_format; + int nczarr_format; + char dimension_separator; } zarr; int creating; /* 1=> created 0=>open */ int native_endianness; /* NC_ENDIAN_LITTLE | NC_ENDIAN_BIG */ - NClist* controllist; /* Envv format */ - struct Controls { - size64_t flags; -# define FLAG_PUREZARR 1 -# define FLAG_SHOWFETCH 2 -# define FLAG_LOGGING 4 -# define FLAG_XARRAYDIMS 8 -# define FLAG_NCZARR_KEY 16 /* _nczarr_xxx keys are stored in object and not in _nczarr_attrs */ - NCZM_IMPL mapimpl; - } controls; - int default_maxstrlen; /* default max str size for variables of type string */ + size_t default_maxstrlen; /* default max str size for variables of type string */ + NClist* urlcontrols; /* controls specified by the file url fragment */ + size64_t flags; +# define FLAG_PUREZARR 1 /* NCZarr metadata, if present, should be suppressed */ +# define FLAG_SHOWFETCH 2 /* [show=fetch] debug output requested */ +# define FLAG_LOGGING 4 /* [log] debug output requested */ +# define FLAG_XARRAYDIMS 8 /* Utilize the xarray _ARRAY_DIMENSIONS attribute */ +# define FLAG_NCZARR_KEY 16 /* _nczarr_xxx keys are stored in object and not in _nczarr_attrs */ +# define FLAG_NOCONSOLIDATED 32 /* Suppress consolidated metadata */ +#ifdef NETCDF_ENABLE_ZOH +# define FLAG_ZOH 32 /* Usind ZoH Protocol */ +#endif + NCZM_IMPL mapimpl; + struct NCZ_Formatter* dispatcher; + struct NCZ_META_HDR* metastate; /* Hold per-format state */ } NCZ_FILE_INFO_T; /* This is a struct to handle the dim metadata. */ typedef struct NCZ_DIM_INFO { NCZcommon common; + struct NCZ_META_HDR* metastate; /* Hold per-format state */ } NCZ_DIM_INFO_T; /** Struct to hold ZARR-specific info for attributes. */ @@ -159,13 +306,17 @@ typedef struct NCZ_ATT_INFO { /* Struct to hold ZARR-specific info for a group. */ typedef struct NCZ_GRP_INFO { NCZcommon common; - /* Read .zgroup and .zattrs once */ + struct NCZ_META_HDR* metastate; /* Hold per-format state */ + /* Read json group and json attrs once */ +#if 0 struct ZARROBJ { - char* prefix; /* prefix of .zgroup and .zattrs */ - NCjson* obj; /* .zgroup|.zarray */ - NCjson* atts; - int nczv1; /* 1 => _nczarr_xxx are in obj and not attributes */ + char* prefix; /* prefix of group and attrs */ + NCjson* obj; /* V2->.zgroup|.zarray, V3->zarr.json */ + NCjson* atts; /* V2->.zattrs, V3->attributes */ + int constatts; /* 1=>do not reclaim atts field */ + int nczkey; /* 1 => _nczarr_xxx are in obj as keys and not attributes */ } zgroup; +#endif } NCZ_GRP_INFO_T; /* Struct to hold ZARR-specific info for a variable. */ @@ -174,15 +325,13 @@ typedef struct NCZ_VAR_INFO { size64_t chunkproduct; /* product of chunksizes */ size64_t chunksize; /* chunkproduct * typesize */ int order; /* 1=>column major, 0=>row major (default); not currently enforced */ - size_t scalar; + int scalar; struct NCZChunkCache* cache; - struct NClist* xarray; /* names from _ARRAY_DIMENSIONS */ + struct NClist* dimension_names; /* names from _ARRAY_DIMENSIONS or dimension_names key */ char dimension_separator; /* '.' | '/' */ - NClist* incompletefilters; - int maxstrlen; /* max length of strings for this variable */ - /* Read .zarray and .zattrs once */ - struct ZARROBJ zarray; - struct ZARROBJ zattrs; + size_t maxstrlen; /* max length of strings for this variable */ + struct NCZ_META_HDR* metastate; /* Hold per-format state */ + int nonstdchunkkey; /* 1 => use a non-standard chunk key encoding; always 0 for V2. For V2, 1 => use V3 encoding */ } NCZ_VAR_INFO_T; /* Struct to hold ZARR-specific info for a field. */ @@ -195,28 +344,40 @@ typedef struct NCZ_TYPE_INFO { NCZcommon common; } NCZ_TYPE_INFO_T; -#if 0 -/* Define the contents of the .nczcontent object */ -/* The .nczcontent field stores the following: - 1. List of (name,length) for dims in the group - 2. List of (name,type) for user-defined types in the group - 3. List of var names in the group - 4. List of subgroups names in the group -*/ -typedef struct NCZCONTENT{ - NClist* dims; - NClist* types; /* currently not used */ - NClist* vars; - NClist* grps; -} NCZCONTENT; -#endif +/* Parsed dimension info */ +typedef struct NCZ_DimInfo { + char norm_name[NC_MAX_NAME+1]; + size64_t shape; + int unlimited; +} NCZ_DimInfo; + +/* Dimension declaration info */ +typedef struct NCZ_DimDecl { + char* fqn; + size64_t shape; +} NCZ_DimDecl; + +/* Parsed Attribute info */ +struct NCZ_AttrInfo { + const char* name; + nc_type nctype; + size_t typelen; + int endianness; + size_t datalen; + void* data; +}; + +EXTERNL struct NCZ_AttrInfo NC_emptyAttrInfo(); /**************************************************/ -extern int ncz_initialized; /**< True if initialization has happened. */ +/* Common property lists */ +EXTERNL const struct NCproplist* NCplistzarrv2; +EXTERNL const struct NCproplist* NCplistzarrv3; + +/**************************************************/ -/* Forward */ -struct NCZ_Filterspec; +extern int ncz_initialized; /**< True if initialization has happened. */ /* zinternal.c */ int NCZ_initialize(void); @@ -229,6 +390,10 @@ int ncz_find_grp_var_att(int ncid, int varid, const char *name, int attnum, NC_GRP_INFO_T** grp, NC_VAR_INFO_T** var, NC_ATT_INFO_T** att); int NCZ_set_log_level(void); +void zsetmaxstrlen(size_t maxstrlen, NC_VAR_INFO_T* var); +void zsetdfaltstrlen(size_t maxstrlen, NC_FILE_INFO_T* file); +void zsetdimsep(char dimsep, NC_VAR_INFO_T* var); +void zsetdfaltdimsep(char dimsep, NC_FILE_INFO_T* file); /* zcache.c */ int ncz_adjust_var_cache(NC_GRP_INFO_T* grp, NC_VAR_INFO_T* var); @@ -244,18 +409,29 @@ int NCZ_zclose_var1(NC_VAR_INFO_T* var); /* zattr.c */ int ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **attlist); -int ncz_create_fillvalue(NC_VAR_INFO_T* var); -int ncz_makeattr(NC_OBJ*, NCindex* attlist, const char* name, nc_type typid, size_t len, void* values, NC_ATT_INFO_T**); +int NCZ_read_attrs(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts, const NCjson* jatypes); +int NCZ_attr_convert(const NCjson* src, nc_type typeid, size_t* countp, NCbytes* dst); +int ncz_makeattr(NC_FILE_INFO_T* file, NC_OBJ* container, struct NCZ_AttrInfo* ainfo, NC_ATT_INFO_T** attp); +int NCZ_attr_delete(NC_FILE_INFO_T* file, NCindex* attlist, const char* name); +int NCZ_getattr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* aname, nc_type nctype, NC_ATT_INFO_T** attp, int* isnewp);; +int NCZ_reclaim_att_data(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att); +int NCZ_set_dual_obj_data(NC_FILE_INFO_T* file, NC_OBJ* object, const char* name, DualAtt which, size_t len, const void* data); +int NCZ_set_att_data(NC_FILE_INFO_T* file, NC_ATT_INFO_T* att, size_t len, const void* data); +int NCZ_sync_dual_att(NC_FILE_INFO_T* file, NC_OBJ* container, const char* aname, DualAtt which, int direction); +int NCZ_ensure_dual_attributes(NC_FILE_INFO_T* file, NC_OBJ* container); +DualAtt NCZ_is_dual_att(const char* aname); +NC_SORT NCZ_dual_att_container(DualAtt da); /* zvar.c */ int ncz_gettype(NC_FILE_INFO_T*, NC_GRP_INFO_T*, int xtype, NC_TYPE_INFO_T** typep); int ncz_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var); int NCZ_ensure_quantizer(int ncid, NC_VAR_INFO_T* var); +int NCZ_write_var_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var); +int NCZ_reclaim_dim(NC_DIM_INFO_T* dim); /* Undefined */ /* Find var, doing lazy var metadata read if needed. */ -int ncz_find_grp_file_var(int ncid, int varid, NC_FILE_INFO_T** file, - NC_GRP_INFO_T** grp, NC_VAR_INFO_T** var); +int ncz_find_file_grp_var(int ncid, int varid, NC_FILE_INFO_T** file, NC_GRP_INFO_T** grp, NC_VAR_INFO_T** var); #endif /* ZINTERNAL_H */ diff --git a/libnczarr/zmap.c b/libnczarr/zmap.c index e878904284..c0c8c9c906 100644 --- a/libnczarr/zmap.c +++ b/libnczarr/zmap.c @@ -4,13 +4,13 @@ */ #include "zincludes.h" -#include -#include #include "ncpathmgr.h" /**************************************************/ -/* Import the current implementations */ +typedef int (*NCZWALKFCN)(NCZMAP*,const char*,const char*,void*); +/**************************************************/ +/*Forward*/ /**************************************************/ @@ -22,8 +22,13 @@ nczmap_features(NCZM_IMPL impl) #ifdef NETCDF_ENABLE_NCZARR_ZIP case NCZM_ZIP: return zmap_zip.features; #endif + #ifdef NETCDF_ENABLE_S3 - case NCZM_S3: return zmap_s3sdk.features; + case NCZM_S3: case NCZM_GS3: +#endif +#ifdef NETCDF_ENABLE_ZOH + case NCZM_ZOH: + return zmap_s3sdk.features; #endif default: break; } @@ -31,7 +36,7 @@ nczmap_features(NCZM_IMPL impl) } int -nczmap_create(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +nczmap_create(NCZM_IMPL impl, const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; NCZMAP* map = NULL; @@ -60,6 +65,10 @@ nczmap_create(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* #endif #ifdef NETCDF_ENABLE_S3 case NCZM_S3: + case NCZM_GS3: +#ifdef NETCDF_ENABLE_ZOH + case NCZM_ZOH: +#endif stat = zmap_s3sdk.create(path, mode, flags, parameters, &map); if(stat) goto done; break; @@ -74,7 +83,7 @@ nczmap_create(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* } int -nczmap_open(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +nczmap_open(NCZM_IMPL impl, const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; NCZMAP* map = NULL; @@ -98,9 +107,16 @@ nczmap_open(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* pa #endif #ifdef NETCDF_ENABLE_S3 case NCZM_S3: + case NCZM_GS3: stat = zmap_s3sdk.open(path, mode, flags, parameters, &map); if(stat) goto done; break; +#ifdef NETCDF_ENABLE_ZOH + case NCZM_ZOH: + stat = zmap_zoh.open(path, mode, flags, parameters, &map); + if(stat) goto done; + break; +#endif #endif default: {stat = REPORT(NC_ENOTBUILT,"nczmap_open"); goto done;} @@ -129,8 +145,13 @@ nczmap_truncate(NCZM_IMPL impl, const char *path) #endif #ifdef NETCDF_ENABLE_S3 case NCZM_S3: + case NCZM_GS3: if((stat = zmap_s3sdk.truncate(path))) goto done; break; +#ifdef NETCDF_ENABLE_ZOH + case NCZM_ZOH: + {stat = REPORT(NC_EZAR,"nczmap_truncate not supported for Zarr-Over-HTTP"); goto done;} +#endif #endif default: {stat = REPORT(NC_ENOTBUILT,"nczmap_truncate"); goto done;} @@ -175,26 +196,25 @@ nczmap_write(NCZMAP* map, const char* key, size64_t count, const void* content) return map->api->write(map, key, count, content); } -/* Define a static qsort comparator for strings for use with qsort */ -static int -cmp_strings(const void* a1, const void* a2) +int +nczmap_list(NCZMAP* map, const char* prefix, NClist* matches) { - const char** s1 = (const char**)a1; - const char** s2 = (const char**)a2; - return strcmp(*s1,*s2); + int stat = NC_NOERR; + if((stat = map->api->list(map, prefix, matches)) == NC_NOERR) { + if((stat = NCZ_sortstringlist(nclistcontents(matches),nclistlength(matches)))) goto done; /* sort the list */ + } +done: + return stat; } int -nczmap_search(NCZMAP* map, const char* prefix, NClist* matches) +nczmap_listall(NCZMAP* map, const char* prefix, NClist* matches) { int stat = NC_NOERR; - if((stat = map->api->search(map, prefix, matches)) == NC_NOERR) { - /* sort the list */ - if(nclistlength(matches) > 1) { - void* base = nclistcontents(matches); - qsort(base, nclistlength(matches), sizeof(char*), cmp_strings); - } + if((stat = map->api->listall(map, prefix, matches)) == NC_NOERR) { + if((stat = NCZ_sortstringlist(nclistcontents(matches),nclistlength(matches)))) goto done; /* sort the list */ } +done: return stat; } @@ -255,7 +275,9 @@ nczm_concat(const char* prefix, const char* suffix, char** pathp) if(strlen(suffix) > 0 && suffix[0] != NCZM_SEP[0]) ncbytescat(buf,NCZM_SEP); ncbytescat(buf,suffix); +assert(buf->content != NULL); if(pathp) *pathp = ncbytesextract(buf); +assert(buf->content == NULL); ncbytesfree(buf); return NC_NOERR; } @@ -282,6 +304,10 @@ nczm_appendn(char** resultp, int n, ...) /* A segment is defined as a '/' plus characters following up to the end or upto the next '/' +@param key [in] to divide +@param nsegs [in] no. of segs in prefix; < 0 means count from right +@param prefixp [out] concat of prefix segs +@param suffixp [out] concat of suffix segs */ int nczm_divide_at(const char* key, int nsegs, char** prefixp, char** suffixp) @@ -289,7 +315,8 @@ nczm_divide_at(const char* key, int nsegs, char** prefixp, char** suffixp) int stat = NC_NOERR; char* prefix = NULL; char* suffix = NULL; - size_t len, i; + size_t len; + size_t i; ptrdiff_t delta; const char* p; size_t abssegs = (size_t)(nsegs >= 0 ?nsegs: -nsegs); @@ -339,7 +366,7 @@ nczm_divide_at(const char* key, int nsegs, char** prefixp, char** suffixp) int nczm_clear(NCZMAP* map) { - if(map) + if(map) nullfree(map->url); return NC_NOERR; } @@ -373,6 +400,8 @@ nczm_localize(const char* path, char** localpathp, int localize) #ifdef _MSC_VER forward = (localize?0:1); +#else + NC_UNUSED(localize); #endif /* If path comes from a url, then it may start with: /x:/... where x is a drive letter. If so, then remove leading / */ @@ -501,33 +530,27 @@ nczm_basename(const char* path, char** basep) return THROW(stat); } -/* bubble sort a list of strings */ -void -nczm_sortlist(NClist* l) -{ - nczm_sortenvv(nclistlength(l),(char**)nclistcontents(l)); -} -static int -nczm_compare(const void* arg1, const void* arg2) -{ - char* n1 = *((char**)arg1); - char* n2 = *((char**)arg2); - return strcmp(n1,n2); -} -/* quick sort a list of strings */ -void -nczm_sortenvv(size_t n, char** envv) +/* Remove a given prefix from the front of each given key */ +int +nczm_removeprefix(const char* prefix, size_t nkeys, char** keys) { - if(n <= 1) return; - qsort(envv, n, sizeof(char*), nczm_compare); -#if 0 -{int i; -for(i=0;i>> sorted: [%d] %s\n",i,(const char*)envv[i]); -} -#endif + int stat = NC_NOERR; + size_t i,prefixlen; + + if(nkeys == 0 || keys == NULL) return stat; + prefixlen = strlen(prefix); + for(i=0;i 0); + if(prefix[0] != '/') ncbytescat(path,"/"); + ncbytescat(path,prefix); + + /* get list of all keys below the prefix */ + if((stat=nczmap_listall(map,ncbytescontents(path),subtree))) goto done; + if(nclistlength(subtree) == 0) goto done; /* empty subtree */ + + /* Apply fcn to all paths in subtree */ + for(stop=0,i=0;!stop && i is the prefix path, -then search returns all such that / is itself a prefix of a "legal" key. -This could be used to implement glob style searches such as "/x/y/ *" or "/x/y/ **" - -This semantics was chosen because it appears to be the minimum required to implement -all other kinds of search using recursion. So for example -1. Avoid returning keys that are not a prefix of some legal key. -2. Avoid returning all the legal keys in the dataset because that set may be very large; - although the implementation may still have to examine all legal keys to get the desired subset. -3. Allow for use of partial read mechanisms such as iterators, if available. - This can support processing a limited set of keys for each iteration. This is a - straighforward tradeoff of space over time. - -This is doable in S3 search using common prefixes with a delimiter of '/', although -the implementation is a bit tricky. For the file system zmap implementation, the legal search keys can be obtained -one level at a time, which directly implements the search semantics. For the zip file implementation, -this semantics is not possible, so the whole tree must be obtained and searched. +List: +The list function takes a prefix path which has a key syntax (see +above). The set of legal keys is the set of keys such that the key +references a content-bearing object -- e.g. /x/y/.zarray or +/.zgroup or /g/.zgroup. Essentially this is the set of keys pointing +to the leaf objects of the tree of keys constituting a dataset. +This set potentially limits the set of keys that need to be examined +during search. + +The list function has two primary purposes: + 1. Support reading of pure zarr datasets (because they do not explicitly track their contents). + 2. Debugging to allow raw examination of the storage. See zdump for example. + +Secondarily, the list function can operate in one of two modes: +"shallow" or "deep". The produce the following different results. + * _shallow_ -- the list function returns the set of names that are immediate "children" (i.e.suffixes) of a given prefix path. That is, if \ is the prefix path, then list returns all \ such that \/\ is itself a prefix of a "legal" key. This functionality could be used to implement glob style searches such as "/x/y/\*" or "/x/y/\*\*" + * _deep_ -- the list function returns the set of all legal keys. + +The shallow case provided because it appears to be the minimum +required to implement all other kinds of search using recursion. So +for example: + 1. Avoid returning keys that are not a prefix of some legal key. + 2. Optionally avoid returning all the legal keys in the dataset because that set may be very large; although the implementation may still have to examine all legal keys to get the desired subset. + 3. Allow for the use of partial read mechanisms such as iterators, if available. This can support processing a limited set of keys for each iteration. This is a straighforward tradeoff of space over time. + +The deep case is useful in two cases: + 1. The underlying zmap implementation only supports the equivalent of "deep". Providing "shallow" when deep is all you have, requires caching of the deep listing and simulating the shallow case over that deep listing. + 2. Deep is useful when it is known that searching will eventually touch all the legal prefix keys, so it might as well be memo'ized into memory for speed. + +The current zmap implementations internally provide some combination of deep and shallow and simulate the other. + * File Map -- implements the shallow case and simulates the deep case. + * ZIP Map -- implements the deep case and simulates the shallow case. + * S3 Map -- implements both the shallow and deep cases. Issues: 1. S3 limits key lengths to 1024 bytes. Some deeply nested netcdf files @@ -112,17 +117,18 @@ always lead to something: a directory or a file. In any case, the zmap API returns three distinguished error code: 1. NC_NOERR if a operation succeeded -2. NC_EEMPTY is returned when accessing a key that has no content or does not exist. +2. NC_EOBJECT is returned when accessing a key that that does not exist. This does not preclude other errors being returned such NC_EACCESS or NC_EPERM or NC_EINVAL if there are permission errors or illegal function arguments, for example. It also does not preclude the use of other error codes internal to the zmap -implementation. So zmap_file, for example, uses NC_ENOTFOUND internally +implementation. So zmap_file, for example, uses NC_ENOTFOUND or NC_EMPTY internally because it is possible to detect the existence of directories and files. This does not propagate to the API. -Note that NC_EEMPTY is a new error code to signal to that the -caller asked for non-content-bearing key. +Note that NC_EEMPTY is a new error code to signal occurrence +of objects like directories that are needed for the implemenation, +but are not content-bearing. The current set of operations defined for zmaps are define with the generic nczm_xxx functions below. @@ -154,6 +160,10 @@ NCZM_UNDEF=0, /* In-memory implementation */ NCZM_FILE=1, /* File system directory-based implementation */ NCZM_ZIP=2, /* Zip-file based implementation */ NCZM_S3=3, /* Amazon S3 implementation */ +NCZM_GS3=4, /* Google S3 implementation */ +#ifdef NETCDF_ENABLE_ZOH +NCZM_ZOH=5, /* Zarr-Over-HTTP server; piggybacks on S3 implementation */ +#endif } NCZM_IMPL; /* Define the default map implementation */ @@ -165,6 +175,10 @@ typedef size64_t NCZM_FEATURES; #define NCZM_UNIMPLEMENTED 1 /* Unknown/ unimplemented */ #define NCZM_WRITEONCE 2 /* Objects can only be written once */ +#ifndef HAVE_MODE_T +typedef int mode_t; +#endif + /* For each dataset, we create what amounts to a class defining data and the API function implementations. @@ -177,10 +191,11 @@ so we can cast to this form; avoids need for a separate per-implementation malloc piece. */ + typedef struct NCZMAP { NCZM_IMPL format; char* url; - int mode; + mode_t mode; size64_t flags; /* Passed in by caller */ struct NCZMAP_API* api; } NCZMAP; @@ -201,15 +216,17 @@ struct NCZMAP_API { int (*len)(NCZMAP* map, const char* key, size64_t* sizep); int (*read)(NCZMAP* map, const char* key, size64_t start, size64_t count, void* content); int (*write)(NCZMAP* map, const char* key, size64_t count, const void* content); - int (*search)(NCZMAP* map, const char* prefix, struct NClist* matches); + /* List Operations */ + int (*list)(NCZMAP* map, const char* prefix, struct NClist* matches); /* shallow listing */ + int (*listall)(NCZMAP* map, const char* prefix, struct NClist* matches); /* deep listing */ }; /* Define the Dataset level API */ typedef struct NCZMAP_DS_API { int version; NCZM_FEATURES features; - int (*create)(const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp); - int (*open)(const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp); + int (*create)(const char *path, mode_t mode, size64_t constraints, void* parameters, NCZMAP** mapp); + int (*open)(const char *path, mode_t mode, size64_t constraints, void* parameters, NCZMAP** mapp); int (*truncate)(const char* url); } NCZMAP_DS_API; @@ -222,6 +239,9 @@ extern NCZMAP_DS_API zmap_zip; #endif #ifdef NETCDF_ENABLE_S3 extern NCZMAP_DS_API zmap_s3sdk; +#ifdef NETCDF_ENABLE_ZOH +extern NCZMAP_DS_API zmap_zoh; +#endif #endif #ifdef __cplusplus @@ -246,7 +266,7 @@ Check if a specified content-bearing object exists or not. @param map -- the containing map @param key -- the key specifying the content-bearing object @return NC_NOERR if the object exists -@return NC_EEMPTY if the object is not content bearing. +@return NC_ENOOBJECT if the object does not exist @return NC_EXXX if the operation failed for one of several possible reasons */ EXTERNL int nczmap_exists(NCZMAP* map, const char* key); @@ -299,7 +319,19 @@ next segment of legal objects that are immediately contained by the prefix key. @return NC_NOERR if the operation succeeded @return NC_EXXX if the operation failed for one of several possible reasons */ -EXTERNL int nczmap_search(NCZMAP* map, const char* prefix, struct NClist* matches); +EXTERNL int nczmap_list(NCZMAP* map, const char* prefix, struct NClist* matches); + +/** +Return a vector of keys representing the +list of all objects whose key is prefixed by the specified prefix arg. +In effect it returns the complete subtree below a specified prefix. +@param map -- the containing map +@param prefix -- the key into the tree whose subtree of keys is to be returned. +@param matches -- return the set of keys in this list; might be empty +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +EXTERNL int nczmap_listall(NCZMAP* map, const char* prefix, struct NClist* matches); /** "Truncate" the storage associated with a map. Delete all contents except @@ -320,8 +352,8 @@ Close a map EXTERNL int nczmap_close(NCZMAP* map, int deleteit); /* Create/open and control a dataset using a specific implementation */ -EXTERNL int nczmap_create(NCZM_IMPL impl, const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp); -EXTERNL int nczmap_open(NCZM_IMPL impl, const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp); +EXTERNL int nczmap_create(NCZM_IMPL impl, const char *path, mode_t mode, size64_t constraints, void* parameters, NCZMAP** mapp); +EXTERNL int nczmap_open(NCZM_IMPL impl, const char *path, mode_t mode, size64_t constraints, void* parameters, NCZMAP** mapp); #ifdef NETCDF_ENABLE_S3 EXTERNL void NCZ_s3finalize(void); @@ -367,11 +399,29 @@ EXTERNL int nczm_canonicalpath(const char* path, char** cpathp); EXTERNL int nczm_basename(const char* path, char** basep); EXTERNL int nczm_segment1(const char* path, char** seg1p); EXTERNL int nczm_lastsegment(const char* path, char** lastp); +EXTERNL int nczm_removeprefix(const char* prefix, size_t nkeys, char** keys); -/* bubble sorts (note arguments) */ -EXTERNL void nczm_sortlist(struct NClist* l); -EXTERNL void nczm_sortenvv(size_t n, char** envv); EXTERNL void NCZ_freeenvv(int n, char** envv); +EXTERNL const char* NCZ_mapkind(NCZM_IMPL impl); + +/** +Walk a subtree of paths and invoke a function on each path. +The walk is breadth-first. + +The function signature is: +int (*fcn)(NCZMAP* map, const char* path, void* param); + +If the function returns NC_NOERR, then the walk continues. +If the function returns an error, then the walk terminates and returns the error. + +@param map -- the containing map +@param prefix -- the prefix key the tree where the search is to occur +@param fcn -- the function to invoke +@param param -- passed as extra argument to fcn +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +EXTERNL int nczmap_walk(NCZMAP* map, const char* prefix, int (*fcn)(NCZMAP*,const char*,const char*,void*), void* param); #ifdef __cplusplus } diff --git a/libnczarr/zmap_file.c b/libnczarr/zmap_file.c index 4f798c4fde..a48454f2a3 100644 --- a/libnczarr/zmap_file.c +++ b/libnczarr/zmap_file.c @@ -36,12 +36,6 @@ #define S_ISDIR(mode) ((mode) & _S_IFDIR) #define S_ISREG(mode) ((mode) & _S_IFREG) #endif -#if 0 -#ifndef __cplusplus -#include -#include -#endif -#endif #endif #include "fbits.h" @@ -59,17 +53,18 @@ #define SKIPLAST 1 #define WHOLEPATH 0 +/* Current API struct versions */ #define NCZM_FILE_V1 1 #ifdef S_IRUSR -static int NC_DEFAULT_CREATE_PERMS = +static mode_t NC_DEFAULT_CREATE_PERMS = (S_IRUSR|S_IWUSR |S_IRGRP|S_IWGRP); -static int NC_DEFAULT_RWOPEN_PERMS = +static mode_t NC_DEFAULT_RWOPEN_PERMS = (S_IRUSR|S_IWUSR |S_IRGRP|S_IWGRP); -static int NC_DEFAULT_ROPEN_PERMS = +static mode_t NC_DEFAULT_ROPEN_PERMS = // (S_IRUSR |S_IRGRP); (S_IRUSR|S_IWUSR |S_IRGRP|S_IWGRP); -static int NC_DEFAULT_DIR_PERMS = +static mode_t NC_DEFAULT_DIR_PERMS = (S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IWGRP); #else static int NC_DEFAULT_CREATE_PERMS = 0660; @@ -127,6 +122,7 @@ static int zfparseurl(const char* path0, NCURI** urip); static int zffullpath(ZFMAP* zfmap, const char* key, char**); static void zfrelease(ZFMAP* zfmap, FD* fd); static void zfunlink(const char* canonpath); +static int zfile_listallR(ZFMAP* map, NCbytes* prefix, int depth, NClist* matches); static int platformerr(int err); static int platformcreatefile(mode_t mode, const char* truepath,FD*); @@ -153,7 +149,7 @@ zfileinitialize(void) if(!zfinitialized) { ZTRACE(5,NULL); const char* env = NULL; - int perms = 0; + mode_t perms = 0; env = getenv("NC_DEFAULT_CREATE_PERMS"); if(env != NULL && strlen(env) > 0) { if(sscanf(env,"%d",&perms) == 1) NC_DEFAULT_CREATE_PERMS = perms; @@ -179,7 +175,7 @@ zfileinitialize(void) */ static int -zfilecreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zfilecreate(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; char* canonpath = NULL; @@ -256,7 +252,7 @@ zfilecreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP */ static int -zfileopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zfileopen(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; char* canonpath = NULL; @@ -326,7 +322,7 @@ zfiletruncate(const char* surl) platformdelete(url->path,0); /* leave root; ignore errors */ done: ncurifree(url); - return stat; + return ZUNTRACE(stat); } /**************************************************/ @@ -342,8 +338,8 @@ zfileexists(NCZMAP* map, const char* key) ZTRACE(5,"map=%s key=%s",zfmap->map.url,key); switch(stat=zflookupobj(zfmap,key,&fd)) { case NC_NOERR: break; - case NC_ENOOBJECT: stat = NC_EEMPTY; - case NC_EEMPTY: break; + case NC_ENOOBJECT: break; + case NC_EEMPTY: stat = NC_ENOOBJECT; break; /* key refers to a directory */ default: break; } zfrelease(zfmap,&fd); @@ -365,8 +361,8 @@ zfilelen(NCZMAP* map, const char* key, size64_t* lenp) /* Get file size */ if((stat=platformseek(&fd, SEEK_END, &len))) goto done; break; - case NC_ENOOBJECT: stat = NC_EEMPTY; - case NC_EEMPTY: break; + case NC_ENOOBJECT: break; + case NC_EEMPTY: stat = NC_ENOOBJECT; break; /* key refers to a directory */ default: break; } zfrelease(zfmap,&fd); @@ -395,8 +391,8 @@ zfileread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* co if((stat = platformseek(&fd, SEEK_SET, &start))) goto done; if((stat = platformread(&fd, count, content))) goto done; break; - case NC_ENOOBJECT: stat = NC_EEMPTY; - case NC_EEMPTY: break; + case NC_ENOOBJECT: break; + case NC_EEMPTY: stat = NC_ENOOBJECT; break; /* key refers to a directory */ default: break; } @@ -467,19 +463,69 @@ zfileclose(NCZMAP* map, int delete) } /* -Return a list of names immediately "below" a specified prefix key. +Return a list of simple names immediately "below" a specified prefix key +(i.e. shallow listing). In theory, the returned list should be sorted in lexical order, but it possible that it is not. The prefix key is not included. */ int -zfilesearch(NCZMAP* map, const char* prefixkey, NClist* matches) +zfilelist(NCZMAP* map, const char* prefixkey, NClist* matches) +{ + int retval = NC_NOERR; + ZFMAP* zfmap = (ZFMAP*)map; + char* fullpath = NULL; + NClist* nextlevel = nclistnew(); + NCbytes* path = ncbytesnew(); + + ZTRACE(5,"map=%s prefixkey=%s",map->url,prefixkey); + + /* Make the root path be true */ + if(prefixkey == NULL || strlen(prefixkey)==0 || strcmp(prefixkey,"/")==0) + fullpath = strdup(zfmap->root); + else if((retval = nczm_concat(zfmap->root,prefixkey,&fullpath))) goto done; + + /* get names of the next level path entries */ + retval = platformdircontent(fullpath, nextlevel); + if(retval == NC_NOERR) { + } else if(retval == NC_EEMPTY) { + retval = NC_NOERR; + goto done; + } else if(retval == NC_ENOOBJECT) { + goto done; + } else { + goto done; + } + + while(nclistlength(nextlevel) > 0) { + char* segment = nclistremove(nextlevel,0); + /* remove any leading '/' */ + if(segment[0] == '/') segment++; + nclistpush(matches,segment); + } + +done: + nclistfreeall(nextlevel); + ncbytesfree(path); + nullfree(fullpath); + return ZUNTRACEX(retval,"|matches|=%d",(int)nclistlength(matches)); +} + +/* +Return a list of all content-bearing keys beginning with specified prefix. +In theory, the returned list should be sorted in lexical order, +but it possible that it is not. +The returned keys have the prefix removed. +*/ +int +zfilelistall(NCZMAP* map, const char* prefixkey, NClist* matches) { int stat = NC_NOERR; ZFMAP* zfmap = (ZFMAP*)map; char* fullpath = NULL; NClist* nextlevel = nclistnew(); - NCbytes* buf = ncbytesnew(); + NCbytes* path = ncbytesnew(); + size_t prefixlen, pathlen; ZTRACE(5,"map=%s prefixkey=%s",map->url,prefixkey); @@ -488,27 +534,80 @@ zfilesearch(NCZMAP* map, const char* prefixkey, NClist* matches) fullpath = strdup(zfmap->root); else if((stat = nczm_concat(zfmap->root,prefixkey,&fullpath))) goto done; - /* get names of the next level path entries */ - switch (stat = platformdircontent(fullpath, nextlevel)) { + /* prime the key tracker */ + /* Ensure leading '/' */ +#ifndef _MSC_VER + if(fullpath[0] != '/') + ncbytescat(path,"/"); +#endif + ncbytescat(path,fullpath); + ncbytescat(path,prefixkey); + /* Ensure no trailing '/' */ + pathlen = ncbyteslength(path); + if(pathlen > 1 && ncbytesget(path,pathlen-1) == '/') { + pathlen--; + ncbytessetlength(path,pathlen); /* truncate last char */ + } + ncbytesnull(path); /* make nul terminated */ + prefixlen = ncbyteslength(path); /* remember the prefix string */ + + if((stat = zfile_listallR(zfmap,path,0,matches))) goto done; + + /* Remove prefix from all entries in matches */ + ncbytessetlength(path,prefixlen); /* restore */ + ncbytesnull(path); + if((stat = nczm_removeprefix(ncbytescontents(path),nclistlength(matches),(char**)nclistcontents(matches)))) goto done; + + /* Lexical sort the results */ + NCZ_sortstringlist(nclistcontents(matches),nclistlength(matches)); + +done: + nclistfreeall(nextlevel); + ncbytesfree(path); + nullfree(fullpath); + return ZUNTRACEX(stat,"|matches|=%d",(int)nclistlength(matches)); +} + +/* zfile_listall recursive helper */ +static int +zfile_listallR(ZFMAP* map, NCbytes* key, int depth, NClist* matches) +{ + int retval = NC_NOERR; + NClist* nextlevel = nclistnew(); + size_t i; + + /* get names of the next level path entries just below key */ + switch (retval = platformdircontent(ncbytescontents(key), nextlevel)) { case NC_NOERR: /* ok */ break; case NC_EEMPTY: /* not a dir */ - stat = NC_NOERR; + retval = NC_NOERR; goto done; case NC_ENOOBJECT: default: goto done; } - while(nclistlength(nextlevel) > 0) { - char* segment = nclistremove(nextlevel,0); - nclistpush(matches,segment); + /* Recurse to walk tree depth first; also record keys */ + for(i=0;imap.url,key,nskip); if((stat=nczm_split(key,segments))) goto done; - len = nclistlength(segments); + len = (int)nclistlength(segments); len -= nskip; /* leave off last nskip segments */ ncbytescat(path,zfmap->root); /* We need path to be absolute */ + /* open and optionally create the root directory */ + if((stat = platformcreatedir(zfmap->map.mode,ncbytescontents(path)))) goto done; + /* Create subsidary groups (if any) */ for(i=0;imap.url,(fd?fd->fd:-1)); platformrelease(fd); (void)ZUNTRACE(NC_NOERR); @@ -614,7 +717,8 @@ static NCZMAP_API zapi = { zfilelen, zfileread, zfilewrite, - zfilesearch, + zfilelist, + zfilelistall, }; static int @@ -687,7 +791,7 @@ platformtestcontentbearing(const char* canonpath) int ret = 0; struct stat buf; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"canonpath=%s",canonpath); errno = 0; ret = NCstat(canonpath, &buf); @@ -709,9 +813,9 @@ platformcreatefile(mode_t mode, const char* canonpath, FD* fd) int stat = NC_NOERR; int ioflags = 0; int createflags = 0; - int permissions = NC_DEFAULT_ROPEN_PERMS; + mode_t permissions = NC_DEFAULT_ROPEN_PERMS; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"mode=%d canonpath=%s",mode,canonpath); errno = 0; if(!fIsSet(mode, NC_WRITE)) @@ -738,6 +842,7 @@ platformcreatefile(mode_t mode, const char* canonpath, FD* fd) stat = platformerr(errno); goto done; /* could not open */ } + done: errno = 0; return ZUNTRACEX(stat,"fd=%d",(fd?fd->fd:-1)); @@ -749,9 +854,9 @@ platformopenfile(mode_t mode, const char* canonpath, FD* fd) { int stat = NC_NOERR; int ioflags = 0; - int permissions = 0; + mode_t permissions = 0; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"mode=%d canonpath=%s",mode,canonpath); errno = 0; if(!fIsSet(mode, NC_WRITE)) { @@ -785,7 +890,7 @@ platformcreatedir(mode_t mode, const char* canonpath) { int ret = NC_NOERR; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"mode=%d canonpath=%s",mode,canonpath); errno = 0; /* Try to access file as if it exists */ @@ -815,7 +920,9 @@ platformopendir(mode_t mode, const char* canonpath) { int ret = NC_NOERR; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + NC_UNUSED(mode); + + ZTRACE(6,"mode=%d canonpath=%s",mode,canonpath); errno = 0; /* Try to access file as if it exists */ @@ -853,7 +960,7 @@ platformdircontent(const char* canonpath, NClist* contents) size_t len; char* d = NULL; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"canonpath=%s",canonpath); switch (ret = platformtestcontentbearing(canonpath)) { case NC_EEMPTY: ret = NC_NOERR; break; /* directory */ @@ -861,7 +968,7 @@ platformdircontent(const char* canonpath, NClist* contents) default: goto done; } - /* We need to process the path to make it work with FindFirstFile */ + /* We need to process the path to make it work with Windows FindFirstFile */ len = strlen(canonpath); /* Need to terminate path with '/''*' */ ffpath = (char*)malloc(len+2+1); @@ -915,7 +1022,7 @@ platformdircontent(const char* canonpath, NClist* contents) errno = 0; DIR* dir = NULL; - ZTRACE(6,"map=%s canonpath=%s",zfmap->map.url,canonpath); + ZTRACE(6,"canonpath=%s",canonpath); switch (ret = platformtestcontentbearing(canonpath)) { case NC_EEMPTY: ret = NC_NOERR; break; /* directory */ @@ -956,7 +1063,7 @@ platformdeleter(NCbytes* canonpath, int depth) char* local = NULL; local = ncbytescontents(canonpath); - ZTRACE(6,"map=%s canonpath=%s delroot=%d depth=%d",zfmap->map.url,local,delroot,depth); + ZTRACE(6,"canonpath=%s depth=%d",canonpath,depth); ret = platformdircontent(local, subfiles); #ifdef DEBUG @@ -1026,7 +1133,7 @@ platformdelete(const char* rootpath, int delroot) int stat = NC_NOERR; NCbytes* canonpath = ncbytesnew(); - ZTRACE(6,"map=%s rootpath=%s delroot=%d",zfmap->map.url,rootpath,delroot); + ZTRACE(6,"rootpath=%s delroot=%d",rootpath,delroot); if(rootpath == NULL || strlen(rootpath) == 0) goto done; ncbytescat(canonpath,rootpath); @@ -1060,15 +1167,15 @@ platformseek(FD* fd, int pos, size64_t* sizep) assert(fd && fd->fd >= 0); - ZTRACE(6,"map=%s fd=%d pos=%d",zfmap->map.url,(fd?fd->fd:-1),pos); + ZTRACE(6,"fd=%d pos=%d",(fd?fd->fd:-1),pos); errno = 0; ret = NCfstat(fd->fd, &statbuf); if(ret < 0) {ret = platformerr(errno); goto done;} - if(sizep) size = *sizep; else size = 0; + if(sizep) size = (off_t)*sizep; else size = 0; newsize = lseek(fd->fd,size,pos); - if(sizep) *sizep = newsize; + if(sizep) *sizep = (size64_t)newsize; done: errno = 0; return ZUNTRACEX(ret,"sizep=%llu",*sizep); @@ -1083,13 +1190,13 @@ platformread(FD* fd, size64_t count, void* content) assert(fd && fd->fd >= 0); - ZTRACE(6,"map=%s fd=%d count=%llu",zfmap->map.url,(fd?fd->fd:-1),count); + ZTRACE(6,"fd=%d count=%llu",(fd?fd->fd:-1),count); while(need > 0) { ssize_t red; if((red = read(fd->fd,readpoint,need)) <= 0) {stat = errno; goto done;} - need -= red; + need -= (size_t)red; readpoint += red; } done: @@ -1106,60 +1213,31 @@ platformwrite(FD* fd, size64_t count, const void* content) assert(fd && fd->fd >= 0); - ZTRACE(6,"map=%s fd=%d count=%llu",zfmap->map.url,(fd?fd->fd:-1),count); + ZTRACE(6,"fd=%d count=%llu",(fd?fd->fd:-1),count); while(need > 0) { ssize_t red = 0; if((red = write(fd->fd,(void*)writepoint,need)) <= 0) {ret = NC_EACCESS; goto done;} - need -= red; + need -= (size_t)red; writepoint += red; } done: return ZUNTRACE(ret); } -#if 0 -static int -platformcwd(char** cwdp) -{ - char buf[4096]; - char* cwd = NULL; - cwd = NCcwd(buf,sizeof(buf)); - if(cwd == NULL) return errno; - if(cwdp) *cwdp = strdup(buf); - return NC_NOERR; -} -#endif - /* When we are finished accessing FD; essentially equivalent to closing the file descriptor. */ static void platformrelease(FD* fd) { - ZTRACE(6,"map=%s fd=%d",zfmap->map.url,(fd?fd->fd:-1)); + ZTRACE(6,"fd=%d",(fd?fd->fd:-1)); if(fd->fd >=0) NCclose(fd->fd); fd->fd = -1; (void)ZUNTRACE(NC_NOERR); } -#if 0 -/* Close FD => return typ to FDNONE */ -*/ -static void -platformclose(FD* fd) -{ - if(fd->typ == FDFILE) { - if(fd->fd >=0) close(fd->u,fd); - fd->fd = -1; - } else if(fd->type == FDDIR) { - if(fd->u.dir) NCclosedir(fd->u,dir); - } - fd->typ = FDNONE; -} -#endif - #ifdef VERIFY static int @@ -1197,38 +1275,3 @@ verifykey(const char* key, int isdir) } #endif -#if 0 -/* Return NC_EINVAL if path does not exist; els 1/0 in isdirp and local path in canonpathp */ -static int -testifdir(const char* path, int* isdirp, char** canonpathp) -{ - int ret = NC_NOERR; - char* tmp = NULL; - char* canonpath = NULL; - struct stat statbuf; - - /* Make path be windows compatible */ - if((ret = nczm_fixpath(path,&tmp))) goto done; - if((canonpath = NCpathcvt(tmp))==NULL) {ret = NC_ENOMEM; goto done;} - - errno = 0; - ret = NCstat(canonpath, &statbuf); - if(ret < 0) { - if(errno == ENOENT) - ret = NC_ENOTFOUND; /* path does not exist */ - else - ret = platformerr(errno); - goto done; - } - /* Check for being a directory */ - if(isdirp) { - if(S_ISDIR(statbuf.st_mode)) {*isdirp = 1;} else {*isdirp = 0;} - } - if(canonpathp) {*canonpathp = canonpath; canonpath = NULL;} -done: - errno = 0; - nullfree(tmp); - nullfree(canonpath); - return ZUNTRACE(ret); -} -#endif /* 0 */ diff --git a/libnczarr/zmap_s3sdk.c b/libnczarr/zmap_s3sdk.c index 552a73473d..9324b4721e 100644 --- a/libnczarr/zmap_s3sdk.c +++ b/libnczarr/zmap_s3sdk.c @@ -105,7 +105,7 @@ NCZ_s3finalize(void) } static int -zs3create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zs3create(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; ZS3MAP* z3map = NULL; @@ -113,28 +113,31 @@ zs3create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** char* prefix = NULL; char* truekey = NULL; - NC_UNUSED(flags); NC_UNUSED(parameters); ZTRACE(6,"path=%s mode=%d flag=%llu",path,mode,flags); if(!zs3initialized) zs3initialize(); +#ifdef NETCDF_ENABLE_ZOH + if(flags & FLAG_ZOH) {stat = NC_EZARR; goto done;} +#endif + + /* Parse the URL */ + ncuriparse(path,&url); + if(url == NULL) + {stat = NC_EURL; goto done;} + /* Build the z3 state */ if((z3map = (ZS3MAP*)calloc(1,sizeof(ZS3MAP))) == NULL) {stat = NC_ENOMEM; goto done;} - z3map->map.format = NCZM_S3; + z3map->map.format = NCZM_S3; /* Even if NCZM_GS3 or NCZM_ZOH */ z3map->map.url = strdup(path); z3map->map.mode = mode; z3map->map.flags = flags; z3map->map.api = (NCZMAP_API*)&nczs3sdkapi; - /* Parse the URL */ - ncuriparse(path,&url); - if(url == NULL) - {stat = NC_EURL; goto done;} - /* Convert to canonical path-style */ if((stat = NC_s3urlprocess(url,&z3map->s3,NULL))) goto done; /* Verify the root path */ @@ -154,7 +157,8 @@ zs3create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** } /* The root object may or may not already exist */ switch (stat = NC_s3sdkinfo(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,NULL,&z3map->errmsg)) { - case NC_EEMPTY: /* no such object */ + case NC_EEMPTY: /* fall thru */ + case NC_ENOOBJECT: /* no such object */ stat = NC_NOERR; /* which is what we want */ errclear(z3map); break; @@ -182,9 +186,16 @@ zs3create(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** /* The problem with open is that there no obvious way to test for existence. So, we assume that the dataset must have -some content. We look for that */ +some content. We look for that +@param path Dataset URL +@param mode Mode flags from nc_open +@param flags Other flags (currently unused) +@param parameters (currently unused) +@param mapp Return the created map +@result NC_NOERR|NC_EXXX +*/ static int -zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zs3open(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; ZS3MAP* z3map = NULL; @@ -192,28 +203,31 @@ zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m NClist* content = NULL; size_t nkeys = 0; - NC_UNUSED(flags); NC_UNUSED(parameters); ZTRACE(6,"path=%s mode=%d flags=%llu",path,mode,flags); if(!zs3initialized) zs3initialize(); + /* Parse the URL */ + if((stat = ncuriparse(path,&url))) goto done; + if(url == NULL) + {stat = NC_EURL; goto done;} + /* Build the z3 state */ if((z3map = (ZS3MAP*)calloc(1,sizeof(ZS3MAP))) == NULL) {stat = NC_ENOMEM; goto done;} +#ifdef NETCDF_ENABLE_ZOH + z3map->map.format = ((flags & FLAG_ZOH)?NCZM_ZOH:NCZM_S3); +#else z3map->map.format = NCZM_S3; +#endif z3map->map.url = strdup(path); z3map->map.mode = mode; z3map->map.flags = flags; z3map->map.api = (NCZMAP_API*)&nczs3sdkapi; - /* Parse the URL */ - if((stat = ncuriparse(path,&url))) goto done; - if(url == NULL) - {stat = NC_EURL; goto done;} - /* Convert to canonical path-style */ if((stat = NC_s3urlprocess(url,&z3map->s3,NULL))) goto done; /* Verify root path */ @@ -222,15 +236,20 @@ zs3open(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m z3map->s3client = NC_s3sdkcreateclient(&z3map->s3); - /* Search the root for content */ - content = nclistnew(); - if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) - goto done; - if(nkeys == 0) { - /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ - stat = NC_ENOOBJECT; - goto done; +#ifdef NETCDF_ENABLE_ZOH + if(!flags & FLAG_ZOH) +#endif + { + content = nclistnew(); + if((stat = NC_s3sdklist(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) + goto done; + if(nkeys == 0) { + /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ + stat = NC_ENOOBJECT; + goto done; + } } + if(mapp) *mapp = (NCZMAP*)z3map; done: @@ -258,11 +277,11 @@ zs3truncate(const char *s3url) if((s3client = NC_s3sdkcreateclient(&info))==NULL) {stat = NC_ES3; goto done;} if((stat = s3clear(s3client,info.bucket,info.rootkey))) goto done; done: - if(s3client) {stat=NC_s3sdkclose(s3client,&info,1,NULL);} + if(s3client) {stat=NC_s3sdkclose(s3client,NULL);} ncurifree(url); ncurifree(purl); (void)NC_s3clear(&info); - return stat; + return ZUNTRACE(stat); } /**************************************************/ @@ -270,7 +289,7 @@ zs3truncate(const char *s3url) /* @return NC_NOERR if key points to a content-bearing object. -@return NC_EEMPTY if object at key has no content. +@return NC_ENOOBJECT if object at key does not exist @return NC_EXXX return true error */ static int @@ -284,7 +303,7 @@ zs3exists(NCZMAP* map, const char* key) /* @return NC_NOERR if key points to a content-bearing object. -@return NC_EEMPTY if object at key has no content. +@return NC_ENOOBJECT if object at key does not exist @return NC_EXXX return true error */ static int @@ -300,7 +319,39 @@ zs3len(NCZMAP* map, const char* key, size64_t* lenp) switch (stat = NC_s3sdkinfo(z3map->s3client,z3map->s3.bucket,truekey,lenp,&z3map->errmsg)) { case NC_NOERR: break; - case NC_EEMPTY: + case NC_EEMPTY: stat = NC_ENOOBJECT; /* fall thru */ + case NC_ENOOBJECT: + if(lenp) *lenp = 0; + goto done; + default: + goto done; + } +done: + nullfree(truekey); + reporterr(z3map); + return ZUNTRACE(stat); +} + +/* +@return NC_NOERR if key is a prefix for some existing object. +@return NC_EEMPTY if key is not such a prefix. +@return NC_EXXX return true error +*/ +static int +zs3keyexists(NCZMAP* map, const char* key) +{ + int stat = NC_NOERR; + ZS3MAP* z3map = (ZS3MAP*)map; + char* truekey = NULL; + + ZTRACE(6,"map=%s key=%s",map->url,key); + + if((stat = maketruekey(z3map->s3.rootkey,key,&truekey))) goto done; + + switch (stat = NC_s3sdkinfo(z3map->s3client,z3map->s3.bucket,truekey,lenp,&z3map->errmsg)) { + case NC_NOERR: break; + case NC_EEMPTY: stat = NC_ENOOBJECT; /* fall thru */ + case NC_ENOOBJECT: if(lenp) *lenp = 0; goto done; default: @@ -314,7 +365,7 @@ zs3len(NCZMAP* map, const char* key, size64_t* lenp) /* @return NC_NOERR if object at key was read -@return NC_EEMPTY if object at key has no content. +@return NC_ENOOBJECT if object at key does not exist @return NC_EXXX return true error */ static int @@ -331,7 +382,8 @@ zs3read(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont switch (stat=NC_s3sdkinfo(z3map->s3client, z3map->s3.bucket, truekey, &size, &z3map->errmsg)) { case NC_NOERR: break; - case NC_EEMPTY: goto done; + case NC_EEMPTY: stat = NC_ENOOBJECT; + case NC_ENOOBJECT: goto done; default: goto done; } /* Sanity checks */ @@ -349,7 +401,7 @@ zs3read(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont /* @return NC_NOERR if key content was written -@return NC_EEMPTY if object at key has no content. +@return NC_ENOOBJECT if object at key does not exist @return NC_EXXX return true error */ static int @@ -370,7 +422,7 @@ zs3write(NCZMAP* map, const char* key, size64_t count, const void* content) switch (stat=NC_s3sdkinfo(z3map->s3client, z3map->s3.bucket, truekey, &objsize, &z3map->errmsg)) { case NC_NOERR: /* Figure out the new size of the object */ break; - case NC_EEMPTY: + case NC_EEMPTY: case NC_ENOOBJECT: stat = NC_NOERR; /* reset */ break; default: reporterr(z3map); goto done; @@ -403,9 +455,8 @@ zs3close(NCZMAP* map, int deleteit) if(deleteit) s3clear(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey); - if(z3map->s3client && z3map->s3.bucket && z3map->s3.rootkey) { - NC_s3sdkclose(z3map->s3client, &z3map->s3, deleteit, &z3map->errmsg); - } + if(z3map->s3client && z3map->s3.bucket && z3map->s3.rootkey) + NC_s3sdkclose(z3map->s3client, &z3map->errmsg); reporterr(z3map); z3map->s3client = NULL; NC_s3clear(&z3map->s3); @@ -416,7 +467,7 @@ zs3close(NCZMAP* map, int deleteit) } /* -Return a list of full keys immediately "below" a specified prefix, +Return a list of all key segments immediately "below" a specified prefix, but not including the prefix. In theory, the returned list should be sorted in lexical order, but it possible that it is not. @@ -424,9 +475,10 @@ but it possible that it is not. @return NC_EXXX return true error */ static int -zs3search(NCZMAP* map, const char* prefix, NClist* matches) +zs3list(NCZMAP* map, const char* prefix, NClist* matches) { - int i,stat = NC_NOERR; + int stat = NC_NOERR; + size_t i; ZS3MAP* z3map = (ZS3MAP*)map; char** list = NULL; size_t nkeys; @@ -439,8 +491,8 @@ zs3search(NCZMAP* map, const char* prefix, NClist* matches) if((stat = maketruekey(z3map->s3.rootkey,prefix,&trueprefix))) goto done; - if(*trueprefix != '/') return NC_EINTERNAL; - if((stat = NC_s3sdkgetkeys(z3map->s3client,z3map->s3.bucket,trueprefix,&nkeys,&list,&z3map->errmsg))) + if(trueprefix[0] != '/') return NC_EINTERNAL; + if((stat = NC_s3sdklist(z3map->s3client,z3map->s3.bucket,trueprefix,&nkeys,&list,&z3map->errmsg))) goto done; if(nkeys > 0) { size_t tplen = strlen(trueprefix); @@ -452,17 +504,20 @@ zs3search(NCZMAP* map, const char* prefix, NClist* matches) p = l+tplen; /* Point to start of suffix */ /* If the key is same as trueprefix, ignore it */ if(*p == '\0') continue; + /* Also check for trailing '/' */ + if(strcmp(p,"/")==0) continue; if(nczm_segment1(p,&newkey)) goto done; +assert(newkey[0] != '/'); nclistpush(tmp,newkey); newkey = NULL; } } /* Now remove duplicates */ for(i=0;iurl,prefix); + + if((stat = maketruekey(z3map->s3.rootkey,prefix,&trueprefix))) goto done; + + if(*trueprefix != '/') return NC_EINTERNAL; + if((stat = NC_s3sdklistall(z3map->s3client,z3map->s3.bucket,trueprefix,&nkeys,&list,&z3map->errmsg))) + goto done; + if(nkeys > 0) { + /* remove duplicates and prefix */ + for(i=0;i 0) { + if(key[0] != '/') /* force '/' separator */ + ncbytescat(truekey,"/"); + ncbytescat(truekey,key); + ncbytesnull(truekey); + } + /* Ensure no trailing '/' */ + if(ncbytesget(truekey,ncbyteslength(truekey)-1) == '/') + ncbytessetlength(truekey,ncbyteslength(truekey)-1); + ncbytesnull(truekey); + if(truekeyp) *truekeyp = ncbytesextract(truekey); done: - nullfree(truekey); + ncbytesfree(truekey); return stat; } @@ -567,8 +675,40 @@ freevector(size_t nkeys, char** list) } /**************************************************/ +/* no-op functions for ZOH + +#ifdef NETCDF_ENABLE_ZOH + +static int +zs3create(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) +{ + return NC_EZARR; +} + +static int +zohtruncate(const char *s3url) +{ + return NC_EZARR; +} + +static int +zohlist(NCZMAP* map, const char* prefix, NClist* matches) +{ + return NC_EZARR; +} + +static int +zohlistall(NCZMAP* map, const char* prefix, NClist* matches) +{ + return NC_EZARR; +} + +#endif +/**************************************************/ + /* External API objects */ +/* Dispatcher for S3/GS3 */ NCZMAP_DS_API zmap_s3sdk; NCZMAP_DS_API zmap_s3sdk = { NCZM_S3SDK_V1, @@ -586,5 +726,30 @@ nczs3sdkapi = { zs3len, zs3read, zs3write, - zs3search, + zs3list, + zs3listall }; + +#ifdef NETCDF_ENABLE_ZOH +/* Dispatcher for ZOH */ +NCZMAP_DS_API zmap_zoh; +NCZMAP_DS_API zmap_zoh = { + NCZM_ZOH_V1, + ZOH_PROPERTIES, + zohcreate, + zs3open, + zohtruncate, +}; + +static NCZMAP_API +nczzohapi = { + NCZM_ZOH_V1, + zs3close, + zs3exists, + zs3len, + zs3read, + zohwrite, + zohlist, + zohlistall, +}; +#endif diff --git a/libnczarr/zmap_zip.c b/libnczarr/zmap_zip.c index 465fc3b982..13a3cb2999 100644 --- a/libnczarr/zmap_zip.c +++ b/libnczarr/zmap_zip.c @@ -98,7 +98,7 @@ zipinitialize(void) */ static int -zipcreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zipcreate(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; ZZMAP* zzmap = NULL; @@ -158,7 +158,7 @@ zipcreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** #ifdef VERIFY zipflags |= ZIP_CHECKCONS; #endif - if((zzmap->archive = zip_open(zzmap->root,zipflags,&zerrno))==NULL) + if((zzmap->archive = zip_open(zzmap->root,(int)zipflags,&zerrno))==NULL) {stat = ziperrno(zerrno); goto done;} /* Tell it about the dataset as a dir */ @@ -186,7 +186,7 @@ zipcreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** */ static int -zipopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp) +zipopen(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP** mapp) { int stat = NC_NOERR; ZZMAP* zzmap = NULL; @@ -238,7 +238,7 @@ zipopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** m zipflags |= ZIP_CHECKCONS; #endif /* Open the file */ - if((zzmap->archive = zip_open(zzmap->root,zipflags,&zerrno))==NULL) + if((zzmap->archive = zip_open(zzmap->root,(int)zipflags,&zerrno))==NULL) {stat = ziperrno(zerrno); goto done;} /* Use entry 0 to obtain the dataset name */ @@ -284,7 +284,7 @@ ziptruncate(const char* surl) zip_close(zip); done: ncurifree(url); - return stat; + return ZUNTRACE(stat); } /**************************************************/ @@ -334,8 +334,8 @@ zipexists(NCZMAP* map, const char* key) ZTRACE(6,"map=%s key=%s",map->url,key); switch(stat=zzlookupobj(zzmap,key,&zindex)) { case NC_NOERR: break; - case NC_ENOOBJECT: stat = NC_EEMPTY; break; - case NC_EEMPTY: break; + case NC_EEMPTY: stat = NC_ENOOBJECT; break; + case NC_ENOOBJECT: break; default: break; } return ZUNTRACE(stat); @@ -355,8 +355,8 @@ ziplen(NCZMAP* map, const char* key, size64_t* lenp) case NC_NOERR: if((stat = zzlen(zzmap,zindex,&len))) goto done; break; - case NC_ENOOBJECT: stat = NC_EEMPTY; len = 0; break; - case NC_EEMPTY: len = 0; break; /* |dir|==0 */ + case NC_EEMPTY: stat = NC_ENOOBJECT; len = 0; break; + case NC_ENOOBJECT: len = 0; break; /* |dir|==0 */ default: goto done; } @@ -375,7 +375,7 @@ zipread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont ZINDEX zindex = -1; zip_flags_t zipflags = 0; int zerrno; - size64_t endpoint; + zip_int64_t endpoint; char* buffer = NULL; char* truekey = NULL; zip_int64_t red = 0; @@ -384,8 +384,8 @@ zipread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont switch(stat = zzlookupobj(zzmap,key,&zindex)) { case NC_NOERR: break; - case NC_ENOOBJECT: stat = NC_EEMPTY; /* fall thru */ - case NC_EEMPTY: /* its a dir; fall thru*/ + case NC_EEMPTY: stat = NC_ENOOBJECT; /* fall thru */ + case NC_ENOOBJECT: /* its a dir; fall thru*/ default: goto done; } @@ -406,10 +406,10 @@ zipread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* cont if(start == 0) { /*optimize to read directly into content */ if((red = zip_fread(zfile, content, (zip_uint64_t)count)) < 0) {stat = (zipmaperr(zzmap)); goto done;} - if(red < count) {stat = NC_EINTERNAL; goto done;} + if(red < (zip_int64_t)count) {stat = NC_EINTERNAL; goto done;} } else { - endpoint = start + count; - if((buffer = malloc(endpoint))==NULL) /* consider caching this */ + endpoint = (zip_int64_t)(start + count); + if((buffer = malloc((size_t)endpoint))==NULL) /* consider caching this */ {stat = NC_ENOMEM; goto done;} if((red = zip_fread(zfile, buffer, (zip_uint64_t)endpoint)) < 0) {stat = (zipmaperr(zzmap)); goto done;} @@ -451,8 +451,8 @@ zipwrite(NCZMAP* map, const char* key, size64_t count, const void* content) stat = NC_EOBJECT; //goto done; /* Zip files are write once */ zflags |= ZIP_FL_OVERWRITE; break; - case NC_ENOOBJECT: stat = NC_NOERR; break; case NC_EEMPTY: /* its a dir; fall thru */ + case NC_ENOOBJECT: stat = NC_NOERR; break; default: goto done; } @@ -481,7 +481,7 @@ zipwrite(NCZMAP* map, const char* key, size64_t count, const void* content) zs = NULL; localbuffer = NULL; - if(zip_set_file_compression(zzmap->archive, zindex, compression, 0) < 0) + if(zip_set_file_compression(zzmap->archive, (zip_uint64_t)zindex, compression, 0) < 0) {stat = zipmaperr(zzmap); goto done;} freesearchcache(zzmap->searchcache); zzmap->searchcache = NULL; @@ -495,7 +495,7 @@ zipwrite(NCZMAP* map, const char* key, size64_t count, const void* content) } /* -Return a list of full keys immediately under a specified prefix key. +Return a list of all keys immediately under a specified prefix key. In theory, the returned list should be sorted in lexical order, but it possible that it is not. Note that for zip, it is not possible to get just the keys of length n+1, @@ -504,13 +504,14 @@ so, this search must get all keys and process them one by one. @return NC_EXXX return true error */ int -zipsearch(NCZMAP* map, const char* prefix0, NClist* matches) +ziplist(NCZMAP* map, const char* prefix0, NClist* matches) { int stat = NC_NOERR; ZZMAP* zzmap = (ZZMAP*)map; char* trueprefix = NULL; size_t truelen; - zip_int64_t num_entries, i; + zip_int64_t num_entries; + size_t i; char** cache = NULL; size_t prefixlen; NClist* tmp = NULL; @@ -574,7 +575,7 @@ zipsearch(NCZMAP* map, const char* prefix0, NClist* matches) tmp = nclistnew(); /* Walk cache looking for names with prefix plus exactly one other segment */ - for(i=0;i < num_entries; i++) { + for(i=0;i < (size_t)num_entries; i++) { /* get ith entry */ #ifdef CACHESEARCH key = zzmap->searchcache[i]; @@ -595,11 +596,11 @@ zipsearch(NCZMAP* map, const char* prefix0, NClist* matches) } /* Now remove duplicates */ for(i=0;iurl,prefix0); + + /* prefix constraints: + 1. prefix is "/" + 2. or prefix has leading '/' and no trailing '/' + */ + + /* Fix up the prefix; including adding the dataset name to the front */ + if(prefix0 == NULL || strlen(prefix0)==0) + prefix0 = "/"; + /* make sure that prefix0 has leading '/' */ + if(prefix0[0] != '/') + {stat = NC_EINVAL; goto done;} + prefixlen = strlen(prefix0); + truelen = prefixlen+strlen(zzmap->dataset)+1; /* possible trailing '/'*/ + if((trueprefix = (char*)malloc(truelen+1+1))==NULL) /* nul term */ + {stat = NC_ENOMEM; goto done;} + /* Build the true prefix */ + trueprefix[0] = '\0'; + strlcat(trueprefix,zzmap->dataset,truelen+1); + strlcat(trueprefix,prefix0,truelen+1); /* recall prefix starts with '/' */ + /* If the prefix did not end in '/', then add it */ + if(prefixlen > 1 && prefix0[prefixlen-1] != '/') + strlcat(trueprefix,"/",truelen+1); + truelen = strlen(trueprefix); + + /* Get number of entries */ + num_entries = zip_get_num_entries(zzmap->archive, (zip_flags_t)0); +#ifdef CACHESEARCH + if(num_entries > 0 && zzmap->searchcache == NULL) { + /* Release the current cache */ + freesearchcache(zzmap->searchcache); + zzmap->searchcache = NULL; + /* Re-build the searchcache */ + if((cache = calloc(sizeof(char*),num_entries+1))==NULL) + {stat = NC_ENOMEM; goto done;} + for(i=0;i < num_entries; i++) { + const char *name = NULL; + /* get ith entry */ + name = zip_get_name(zzmap->archive, i, (zip_flags_t)0); + /* Add to cache */ + if((cache[i] = strdup(name))==NULL) + {stat = NC_ENOMEM; goto done;} + } + cache[num_entries] = NULL; + zzmap->searchcache = cache; cache = NULL; + } +#endif +#ifdef CACHESEARCH + if(zzmap->searchcache != NULL) +#endif + { + const char *key = NULL; + size_t keylen = 0; + + /* Walk cache looking for names with prefix */ + for(i=0;i < (size_t)num_entries; i++) { + /* get ith entry */ +#ifdef CACHESEARCH + key = zzmap->searchcache[i]; +#else + key = zip_get_name(zzmap->archive, i, (zip_flags_t)0); +#endif + keylen = strlen(key); + /* Suppress any key that ends in '/' */ + if(key[keylen-1] == '/') continue; + nclistpush(matches,nulldup(key)); + } + /* Now remove later duplicates */ + for(i=0;i 0) + for(j=nclistlength(matches)-1;j>i;j--) { + char* js = (char*)nclistget(matches,(size_t)j); + if(strcmp(js,is)==0) { + /* reclaim the duplicate */ + nclistremove(matches,(size_t)j); + nullfree(js); + } + } + } + } + + /* Remove prefix from all entries in matches. */ + if(trueprefix[strlen(trueprefix)-1] == '/') + {trueprefix[strlen(trueprefix)-1] = '\0'; truelen--;} + + if((stat = nczm_removeprefix(trueprefix,nclistlength(matches),(char**)nclistcontents(matches)))) goto done; + + /* Lexical sort the results */ + NCZ_sortstringlist(nclistcontents(matches),nclistlength(matches)); + +done: + if(cache != NULL) freesearchcache(cache); + nullfree(trueprefix); + return ZUNTRACEX(stat,"|matches|=%d",(int)nclistlength(matches)); +} + /**************************************************/ /* Utilities */ @@ -622,7 +741,7 @@ static int zzcreategroup(ZZMAP* zzmap, const char* key, int nskip) { int stat = NC_NOERR; - int i, len; + int i,len; char* fullpath = NULL; NCbytes* path = ncbytesnew(); NClist* segments = nclistnew(); @@ -632,12 +751,12 @@ zzcreategroup(ZZMAP* zzmap, const char* key, int nskip) ZTRACE(7,"map=%s key=%s nskip=%d",zzmap->map.url,key,nskip); if((stat=nczm_split(key,segments))) goto done; - len = nclistlength(segments); + len = (int)nclistlength(segments); len -= nskip; /* leave off last nskip segments */ /* Start with the dataset */ ncbytescat(path,zzmap->dataset); for(i=0;iarchive,zindex,zipflags,&statbuf) < 0) + if(zip_stat_index(zzmap->archive,(zip_uint64_t)zindex,zipflags,&statbuf) < 0) {stat = (zipmaperr(zzmap)); goto done;} assert(statbuf.valid & ZIP_STAT_SIZE); len = statbuf.size; /* Always return uncompressed size */ @@ -747,7 +866,8 @@ static NCZMAP_API zapi = { ziplen, zipread, zipwrite, - zipsearch, + ziplist, + ziplistall }; static int diff --git a/libnczarr/zmetadata.c b/libnczarr/zmetadata.c new file mode 100644 index 0000000000..b9e3698a06 --- /dev/null +++ b/libnczarr/zmetadata.c @@ -0,0 +1,411 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" + +/**************************************************/ + +extern int NCZMD2_initialize(void); +extern int NCZMD2_finalize(void); + +/**************************************************/ +//////////////////////////////////////////////////// + +int NCZMD_initialize(void) +{ + int stat = NC_NOERR; + if((stat=NCZMD2_initialize())) goto done; +done: + return THROW(stat); +} + +int NCZMD_finalize(void) +{ + + int stat = NC_NOERR; + if((stat=NCZMD2_finalize())) goto done; +done: + return THROW(stat); +} + +///////////////////////////////////////////////////////////////////// +// Fetch list of subnodes of .zmetadata or storage +///////////////////////////////////////////////////////////////////// + +/** +Return a vector of names (not keys) representing the +next segment of legal objects that are immediately contained by the prefix key. +@param file -- the containing file +@param prefix -- the key into the tree where the search is to occur +@param matches -- return the set of names in this list; might be empty +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_list(NC_FILE_INFO_T* file, const char* prefix, struct NClist* matches) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->list(file, prefix, matches); +} + +/** +Return a vector of keys representing the +list of all objects whose key is prefixed by the specified prefix arg. +In effect it returns the complete subtree below a specified prefix. +@param file -- the containing file +@param prefix -- the key into the tree whose subtree of keys is to be returned. +@param matches -- return the set of keys in this list; might be empty +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_listall(NC_FILE_INFO_T* file, const char* prefix, struct NClist* matches) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->listall(file, prefix, matches); +} + +/** +Test if key refers to content-bearing object. +@param file -- the containing file +@param key -- the key into the tree to test +@return NC_NOERR if the key exists +@return NC_ENOOBJECT if the key does not exists +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_exists(NC_FILE_INFO_T* file, const char* key) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->exists(file, key); +} + +/** +Open a consolidated metadata object +@param file -- the containing file +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_open(NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->open(file); +} + +/** +Create a consolidated metadata object +@param file -- the containing file +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_create(NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->create(file); +} + +/** +Close a consolidated metadata object +@param file -- the containing file +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_close(NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->close(file); +} + +/** +Write a consolidated metadata object +@param file -- the containing file +@return NC_NOERR if the operation succeeded +@return NC_EXXX if the operation failed for one of several possible reasons +*/ +int +NCZMD_consolidate(NC_FILE_INFO_T* file) +{ + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + return zfile->metadata_handler.dispatcher->consolidate(file); +} + +///////////////////////////////////////////////////////////////////// +// Fetch JSON content from .zmetadata or storage +///////////////////////////////////////////////////////////////////// + +int +NCZMD_fetch_json_content(NC_FILE_INFO_T *file, NCZMD_MetadataType mdtype, const char *key, NCjson **jobjp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if((stat = zfile->metadata_handler.dispatcher->fetch_json_content(file, mdtype, key, jobjp))) goto done; + +done: + return THROW(stat); +} + +#if 0 +int +NCZMD_fetch_json_group(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, NCjson **jgroup) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->fetch_json_content(file, NCZMD_GROUP, key, jgroup))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} + +int +NCZMD_fetch_json_attrs(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, NCjson **jattrs) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->fetch_json_content(file, NCZMD_ATTRS, key , jattrs))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} + +int +NCZMD_fetch_json_array(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, NCjson **jarray) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->fetch_json_content(file, NCZMD_ARRAY, key, jarray))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} +#endif /*0*/ + +//////////////////////////////////////////////////////////////////////////////// +// Update in-memory + storage JSON content +//////////////////////////////////////////////////////////////////////////////// + +int +NCZMD_update_json_content(NC_FILE_INFO_T *file, NCZMD_MetadataType mdtype, const char *key, const NCjson* jobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if((stat = zfile->metadata_handler.dispatcher->update_json_content(file, mdtype, key, jobj))) goto done; + +done: + return THROW(stat); +} + +#if 0 +int +NCZMD_update_json_group(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, const NCjson *jgroup) +{ + int stat = NC_NOERR; + char *group= NULL; + char *key = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->update_json_content(file, NCZMD_GROUP, key, jgroup))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} + +int +NCZMD_update_json_attrs(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, const NCjson *jattrs) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->update_json_content(file, NCZMD_ATTRS, key , jattrs))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} + +int +NCZMD_update_json_array(NC_FILE_INFO_T *file, NC_GRP_INFO_T *grp, const char *name, const NCjson *jarray) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + char *group= NULL; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &group)) != NC_NOERR)) + goto done; + if ((stat = nczm_concat(group, name, &key))) + goto done; + if((stat = zfile->metadata_handler.dispatcher->update_json_content(file, NCZMD_ARRAY, key, jarray))) + goto done; + +done: + nullfree(group); + nullfree(key); + return stat; +} +#endif /*0*/ + +//////////////////////////////////////////////////////////////////////////// + +int +NCZMD_is_metadata_consolidated(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata *zmd = &zfile->metadata_handler; + if(zmd->jcsl == NULL || + NCJsort(zmd->jcsl) != NCJ_DICT || + (zfile->flags & FLAG_NOCONSOLIDATED)) + {stat = NC_ENOOBJECT; goto done;} +done: + return stat; +} + +int +NCZMD_get_metadata_format(NC_FILE_INFO_T* file, int* zarrformat) +{ + // Only pure Zarr is determined + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata *zmd = &zfile->metadata_handler; + + if(!zmd->dispatcher ) {stat = NC_EFILEMETA; goto done;} + if(zmd->dispatcher->zarr_format >= ZARRFORMAT2) { + *zarrformat = zmd->dispatcher->zarr_format; + goto done; + } + /* Last thing to do is to look for: + .zattrs, .zgroup or .zarray + */ + if(!nczmap_exists(zfile->map, "/" Z2ATTRS) + && !nczmap_exists(zfile->map, "/" Z2GROUP) + && !nczmap_exists(zfile->map, "/" Z2ARRAY)) + goto done; + *zarrformat = ZARRFORMAT2; +done: + return THROW(stat); +} + +/* Inference of the metadata handler */ +int +NCZMD_set_metadata_handler(NC_FILE_INFO_T *file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + const NCZ_Metadata_Dispatcher *zmd_dispatcher = NULL; + int disallowzmetadata = 0; + NCjson* jcsl = NULL; + const NCjson* jmeta = NULL; + + /* Override from env var or mode flag */ + if((zfile->flags & FLAG_NOCONSOLIDATED) || getenv(NCZARRDEFAULTNOMETA) != NULL) disallowzmetadata = 1; + + /* First, figure out and set the dispatcher */ + if(disallowzmetadata) { + zmd_dispatcher = NCZ_metadata_handler; + } else if(zfile->creating) { + switch (zfile->zarr.zarr_format) { + case 2: + zmd_dispatcher = NCZ_csl_metadata_handler2; + break; + case 3: + zmd_dispatcher = NCZ_csl_metadata_handler3; + break; + default: + zmd_dispatcher = NCZ_metadata_handler; + break; + } + } else if(zmd_dispatcher == NULL) { /* opening a file */ + /* See if /.zmetadata exists */ + switch (zfile->zarr.zarr_format) { + case 2: /* Try to download .zmetadata */ + if((stat = NCZ_downloadjson(zfile->map,Z2METADATA,&jcsl))) goto done; + if(jcsl != NULL) + NCJcheck(NCJdictget(jcsl,"metadata",(NCjson**)&jmeta)); + break; +#ifdef NETCDF_ENABLE_NCZARR_V3 + case 3: /* For V3, we need to look inside the root group's zarr.json */ + if((stat = NCZ_downloadjson(zfile->map,Z3METADATA,&jcsl))) goto done; + if(jcsl != NULL) + NCJcheck(NCJdictget(jcsl,"metadata",(NCjson**)&jmeta)); + break; +#endif + default: + break; + } + if(jmeta != NULL && zfile->zarr.zarr_format == 2) + zmd_dispatcher = NCZ_csl_metadata_handler2; +#ifdef NETCDF_ENABLE_NCZARR_V3 + else if(jmeta && zfile->zarr.zarr_format == 3) + zmd_dispatcher = NCZ_csl_metadata_handler3; +#endif + else + zmd_dispatcher = NCZ_metadata_handler; + } + zmd->dispatcher = zmd_dispatcher; + assert(zmd->dispatcher != NULL); + + zmd->jcsl = jcsl; jcsl = NULL; + zmd->jmeta = jmeta; jmeta = NULL; + + /* Now open/create the consolidated metadata object */ + if(zfile->creating) { + if((stat=NCZMD_create(file))) goto done; + } else { + if((stat=NCZMD_open(file))) goto done; + } + +done: + return THROW(stat); +} diff --git a/libnczarr/zmetadata.h b/libnczarr/zmetadata.h new file mode 100644 index 0000000000..3dee5bf26c --- /dev/null +++ b/libnczarr/zmetadata.h @@ -0,0 +1,111 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ + +/* +Zarr Metadata Handling + +Encapsulates Zarr metadata operations across versions, supporting both +consolidated access and per-file access. Provides a common interface +for metadata operations. + +The dispatcher is defined by the type NCZ_Metadata_Dispatcher. +It offers several types of operations that allow decoupling/abstract +filesystem access, content reading of the JSON metadata files +1. Retrieve JSON representation of (sub)groups, arrays and attributes. + Directly read from filesystem/objectstore or retrieve the JSON + object from the consolidated view respective to the group or variable +2. Wrappers for selected zmap operations that are key based. + +Note: This will also be the case of zarr v3 +(the elements will be extracted from zarr.json instead) +*/ + +#ifndef ZMETADATA_H +#define ZMETADATA_H + +#if defined(__cplusplus) +extern "C" +{ +#endif +/* This is the version of the metadata table. It should be changed + * when new functions are added to the metadata table. */ +#ifndef NCZ_METADATA_VERSION +#define NCZ_METADATA_VERSION 1 +#endif /*NCZ_METADATA_VERSION*/ + +/* The keys in this dict are all stored in root group's container for consolidated metadata */ +#define MINIMIM_CSL_REP2_RAW "{\"zarr_consolidated_format\":1, \"metadata\":{}}" +#define MINIMIM_CSL_REP3_RAW "{\"zarr_format\": 3, \"node_type\": \"group\", \"attributes\": {}, \"kind\": \"inline\", \"must_understand\": False, \"metadata\": {}}" + +typedef enum NCZMD_MetadataType { + NCZMD_NULL, + NCZMD_GROUP, + NCZMD_ATTRS, + NCZMD_ARRAY +} NCZMD_MetadataType; + +typedef struct NCZ_Metadata_Dispatcher +{ + int zarr_format; /* Zarr format version */ + int dispatch_version; /* Dispatch table version*/ + size64_t flags; /* Metadata handling flags */ +#define ZARR_NOT_CONSOLIDATED 0 +#define ZARR_CONSOLIDATED 1 + int (*open)(NC_FILE_INFO_T* file); + int (*create)(NC_FILE_INFO_T* file); + int (*close)(NC_FILE_INFO_T* file); + int (*consolidate)(NC_FILE_INFO_T* file); + int (*fetch_json_content)(NC_FILE_INFO_T *, NCZMD_MetadataType, const char* key, NCjson** jobj); + int (*update_json_content)(NC_FILE_INFO_T *, NCZMD_MetadataType, const char *key, const NCjson *jobj); + + /* zmap wrappers */ + int (*list)(NC_FILE_INFO_T*, const char* prefix, NClist* matches); + int (*listall)(NC_FILE_INFO_T*, const char* prefix, NClist* matches); + int (*exists)(NC_FILE_INFO_T* file, const char* prefix); +} NCZ_Metadata_Dispatcher; + +typedef struct NCZ_Metadata +{ + NCjson *jcsl; /* Consolidated JSON container: .zmetadata for V2, + or root group zarr.json (minus "metadata" dict) for V3 */ + const NCjson *jmeta; /* "metadata" dict from jcsl (or NULL) */ + int dirty; /* The consolidated metadata was modified */ + const NCZ_Metadata_Dispatcher *dispatcher; +} NCZ_Metadata; + +/* Handler when not using consolidated metadata */ +extern const NCZ_Metadata_Dispatcher *NCZ_metadata_handler; +/* Consolidated metadata handler for Zarr version 2*/ +extern const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2; +/* Consolidated metadata handler for Zarr version 3*/ +extern const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler3; + +/* Called by nc_initialize and nc_finalize respectively */ +extern int NCZMD_initialize(void); +extern int NCZMD_finalize(void); + +extern int NCZMD_open(NC_FILE_INFO_T *file); +extern int NCZMD_create(NC_FILE_INFO_T *file); +extern int NCZMD_close(NC_FILE_INFO_T *file); +extern int NCZMD_consolidate(NC_FILE_INFO_T *file); + +extern int NCZMD_fetch_json_content(NC_FILE_INFO_T *, NCZMD_MetadataType, const char* key, NCjson** jobjp); +extern int NCZMD_update_json_content(NC_FILE_INFO_T *, NCZMD_MetadataType, const char *key, const NCjson *jobj); + +extern int NCZMD_list(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +extern int NCZMD_listall(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +extern int NCZMD_exists(NC_FILE_INFO_T* file, const char* prefix); + +/**************************************************/ + +/* Inference for the Metadata handler */ +extern int NCZMD_is_metadata_consolidated(NC_FILE_INFO_T* file); +extern int NCZMD_get_metadata_format(NC_FILE_INFO_T *zfile, int *zarrformat); /* Only pure Zarr is determined */ +extern int NCZMD_set_metadata_handler(NC_FILE_INFO_T *zfile); +extern void NCZMD_clear_metadata_handler(NCZ_Metadata * zmd); + +#if defined(__cplusplus) +} +#endif + +#endif /* ZMETADATA_H */ diff --git a/libnczarr/zmetadata0.c b/libnczarr/zmetadata0.c new file mode 100644 index 0000000000..6637077afa --- /dev/null +++ b/libnczarr/zmetadata0.c @@ -0,0 +1,147 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" + +/**************************************************/ + +extern int NCZF0_initialize(void); +extern int NCZF0_finalize(void); + +static int list_nodes(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int listall_nodes(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int exists_nodes(NC_FILE_INFO_T* file, const char* prefix); + +static int fetch_json_content(NC_FILE_INFO_T* file, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +static int update_json_content(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj); + +static int open_noop(NC_FILE_INFO_T* file); +static int create_noop(NC_FILE_INFO_T* file); +static int close_noop(NC_FILE_INFO_T* file); +static int consolidate_noop(NC_FILE_INFO_T*); + +/**************************************************/ + +static const NCZ_Metadata_Dispatcher NCZ_md_table = { /* Common to V2 and V3 */ + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_NOT_CONSOLIDATED, /* Flags*/ + .list = list_nodes, + .listall = listall_nodes, + .exists = exists_nodes, + .open = open_noop, + .create = create_noop, + .close = close_noop, + .consolidate = consolidate_noop, + .fetch_json_content = fetch_json_content, + .update_json_content = update_json_content, +}; +const NCZ_Metadata_Dispatcher *NCZ_metadata_handler= &NCZ_md_table; + +/******************************************************/ + +int +NCZMD0_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD0_finalize(void) +{ + return NC_NOERR; +} + +/*/////////////////////////////////////////////////// +// .zmetadata dispatch functions +///////////////////////////////////////////////////*/ + +static int +list_nodes(NC_FILE_INFO_T* file, const char* prefix, NClist* matches) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if((stat = nczmap_list(zfile->map,prefix,matches))) goto done; +done: + return stat; +} + +static int +listall_nodes(NC_FILE_INFO_T* file, const char* prefix, NClist* matches) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if((stat = nczmap_listall(zfile->map,prefix,matches))) goto done; +done: + return stat; +} + +static int +exists_nodes(NC_FILE_INFO_T* file, const char* prefix) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if((stat = nczmap_exists(zfile->map,prefix))) goto done; +done: + return stat; +} + +static int +fetch_json_content(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *key, NCjson **jobjp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jobj = NULL; + + NC_UNUSED(zobj_t); + if((stat = NCZ_downloadjson(zfile->map, key, &jobj))) goto done; + if(jobjp) {*jobjp = jobj; jobj = NULL;} +done: + NCJreclaim(jobj); + return stat; +} + +static int +update_json_content(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char* key, const NCjson *jobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + + NC_UNUSED(zobj_t); + if((stat = NCZ_uploadjson(zfile->map, key, jobj))) goto done; + zfile->metadata_handler.dirty = 0; /* after writing */ + +done: + return stat; +} + + +static int +open_noop(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return NC_NOERR; +} + +static int +create_noop(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return NC_NOERR; +} + +static int +close_noop(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return NC_NOERR; +} + +static int +consolidate_noop(NC_FILE_INFO_T* file) +{ + NC_UNUSED(file); + return NC_NOERR; +} diff --git a/libnczarr/zmetadata2.c b/libnczarr/zmetadata2.c new file mode 100644 index 0000000000..cdae7d9c84 --- /dev/null +++ b/libnczarr/zmetadata2.c @@ -0,0 +1,273 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" + +/**************************************************/ + +extern int NCZF2_initialize(void); +extern int NCZF2_finalize(void); + +static int list_nodes_csl_v2(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int listall_nodes_csl_v2(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int exists_key_csl_v2(NC_FILE_INFO_T* file, const char* prefix); + +static int fetch_csl_json_content_v2(NC_FILE_INFO_T* file, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +static int update_csl_json_content_v2(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj); + +static int open_csl_v2(NC_FILE_INFO_T* file); +static int create_csl_v2(NC_FILE_INFO_T* file); +static int close_csl_v2(NC_FILE_INFO_T* file); +static int consolidate_csl_v2(NC_FILE_INFO_T*); + +/**************************************************/ +static const NCZ_Metadata_Dispatcher NCZ_csl_md2_table = { + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_CONSOLIDATED, /* Flags*/ + .list = list_nodes_csl_v2, + .listall = listall_nodes_csl_v2, + .exists = exists_key_csl_v2, + .consolidate = consolidate_csl_v2, + .close = close_csl_v2, + .open = open_csl_v2, + .create = create_csl_v2, + .fetch_json_content = fetch_csl_json_content_v2, + .update_json_content = update_csl_json_content_v2, +}; +const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2 = &NCZ_csl_md2_table; + +/******************************************************/ + +int +NCZMD2_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD2_finalize(void) +{ + return NC_NOERR; +} + +/*/////////////////////////////////////////////////// +// .zmetadata dispatch functions +///////////////////////////////////////////////////*/ + +static int +list_nodes_csl_v2(NC_FILE_INFO_T* file, const char* prefix, NClist* matches) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + char* seg1 = NULL; + NClist* segments = nclistnew(); + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + /* Walk the metadata nodes and collect the matches */ + for(i=0;ijmeta);i++) { + const NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + size_t slen = strlen(skey); + size_t j, found; + /* Check for prefix taking root key into acct. */ + if((plen == 0 && slen > 0) || strncmp(skey,prefix,plen) == 0) { + const char* suffix = NULL; + /* This is a match */ + suffix = &skey[plen]; /* point past the prefix */ + assert(strlen(suffix) > 0); + nclistclearall(segments); + ncz_splitkey(suffix,segments); + if(nclistlength(segments) > 0) { /* test if just the prefix */ + seg1 = (char*)nclistremove(segments,0); + /* suppress duplicates */ + for(found=0,j=0;jformat_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + NCbytes* key = ncbytesnew(); + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + ncbytescat(key,"/"); + /* Walk the metadata nodes and collect the matches (with leading '/') */ + for(i=0;ijmeta);i++) { + NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + if(strncmp(skey,prefix,plen) > 0) { + /* This is a match and is not just the prefix*/ + ncbytessetlength(key,1); + ncbytescat(key,prefix); /* add leading '/' */ + nclistpush(matches,strdup(ncbytescontents(key))); + } + } + return stat; +} + +static int +exists_key_csl_v2(NC_FILE_INFO_T* file, const char* prefix) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + /* Walk the metadata nodes and see if there is a prefix match */ + for(i=0;ijmeta);i++) { + NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + if(strncmp(skey,prefix,plen) == 0) {stat = NC_NOERR; goto done;} + } + stat = NC_ENOOBJECT; +done: + return stat; +} + +static int +fetch_csl_json_content_v2(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char* key, NCjson **jobjp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jobj = NULL; + NCjson *jkey = NULL; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + NC_UNUSED(zobj_t); + assert(zmd->jmeta != NULL); + if(key[0] == '/') key++; /* remove any leading key */ + /* Meta-data is stored a mostly flat format using the whole key (with leading / removed) */ + if ((stat = NCJdictget(zmd->jmeta, key, (NCjson**)&jkey))) goto done; + NCJcheck(NCJclone(jkey, &jobj)); + if(jobj != NULL) + {if(jobjp) {*jobjp = jobj; jobj = NULL;}} +done: + return stat; +} + +static int +update_csl_json_content_v2(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *key, const NCjson *jobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jclone = NULL; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + NC_UNUSED(zobj_t); + if(key[0] == '/') key++; + + /* Create the metadata skeleton if it does not exist */ + if(zmd->jcsl == NULL) { + if((stat = NCJparse(MINIMIM_CSL_REP2_RAW,0,&zmd->jcsl))) goto done; + NCJcheck(NCJdictget(zmd->jcsl,"metadata",(NCjson**)&zmd->jmeta)); + } + /* Insert/overwrite the key+value */ + NCJcheck(NCJclone(jobj,&jclone)); + NCJcheck(NCJinsert((NCjson*)zmd->jmeta,key,jclone)); + zmd->dirty = 1; + +#if 0 /* Do we need this? */ + // Allocating representation if doesn't exist + // Updating the internal JSON representation to be synced later + NCjson * jrep = NULL; + if ((stat = NCJdictget(zfile->metadata_handler.jcsl,"metadata", (NCjson**)&jrep)) || jrep == NULL) { + goto done; + } +#endif /*0*/ + +done: + return stat; +} + +//////////////////////////////////////////////////////////////////////////// +/* Writes .zmetadata file into storage */ + +static int +consolidate_csl_v2(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + if (zmd->dirty) { + if(zmd->jcsl == NULL) {stat = NC_EZARRMETA; goto done;} + stat = NCZ_uploadjson(zfile->map, Z2METADATA ,zmd->jcsl); + zmd->dirty = 0; + } +done: + return stat; +} + +static int +close_csl_v2(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + NCJreclaim(zmd->jcsl); + zmd->jcsl = NULL; + zmd->jmeta = NULL; + return stat; +} + +static int +open_csl_v2(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + /* Read /.zmetadata */ + if(zmd->jcsl == NULL) { + if((stat = NCZ_downloadjson(zfile->map,Z2METADATA,&zmd->jcsl))) goto done; + } + if(zmd->jcsl == NULL || NCJsort(zmd->jcsl) != NCJ_DICT) {stat = NC_EZARRMETA; goto done;} + /* Pull out the "metadata" key and save it */ + NCJcheck(NCJdictget(zmd->jcsl,"metadata",(NCjson**)&zmd->jmeta)); + if(zmd->jmeta == NULL || NCJsort(zmd->jmeta) != NCJ_DICT) {stat = NC_EZARRMETA; goto done;} +done: + return stat; +} + +static int +create_csl_v2(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + /* Create the JSON skeleton */ + if((stat = NCJparse(MINIMIM_CSL_REP2_RAW,0,&zmd->jcsl))) goto done; /* Create the metadata skeleton */ + /* Pull out the "metadata" key and save it */ + NCJcheck(NCJdictget(zmd->jcsl,"metadata",(NCjson**)&zmd->jmeta)); + +done: + return stat; +} + diff --git a/libnczarr/zmetadata3.c b/libnczarr/zmetadata3.c new file mode 100644 index 0000000000..0d3a5298e5 --- /dev/null +++ b/libnczarr/zmetadata3.c @@ -0,0 +1,282 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" + +/**************************************************/ + +extern int NCZF3_initialize(void); +extern int NCZF3_finalize(void); + +static int list_nodes_csl_v3(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int listall_nodes_csl_v3(NC_FILE_INFO_T*, const char* prefix, struct NClist* matches); +static int exists_key_csl_v3(NC_FILE_INFO_T* file, const char* prefix); + +static int fetch_csl_json_content_v3(NC_FILE_INFO_T* file, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +static int update_csl_json_content_v3(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *prefix, const NCjson *jobj); + +static int open_csl_v3(NC_FILE_INFO_T* file); +static int create_csl_v3(NC_FILE_INFO_T* file); +static int close_csl_v3(NC_FILE_INFO_T* file); +static int consolidate_csl_v3(NC_FILE_INFO_T*); + +/**************************************************/ + +static const NCZ_Metadata_Dispatcher NCZ_csl_md3_table = { + ZARRFORMAT3, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_CONSOLIDATED, /* Flags*/ + .list = list_nodes_csl_v3, + .listall = listall_nodes_csl_v3, + .exists = exists_key_csl_v3, + .fetch_json_content = fetch_csl_json_content_v3, + .update_json_content = update_csl_json_content_v3, + .open = open_csl_v3, + .create = create_csl_v3, + .close = close_csl_v3, + .consolidate = consolidate_csl_v3, +}; + +const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler3 = &NCZ_csl_md3_table; + +/******************************************************/ + +int +NCZMD3_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD3_finalize(void) +{ + return NC_NOERR; +} + +/*/////////////////////////////////////////////////// +// .zmetadata dispatch functions +///////////////////////////////////////////////////*/ + +static int +list_nodes_csl_v3(NC_FILE_INFO_T* file, const char* prefix, NClist* matches) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + char* seg1 = NULL; + NClist* segments = nclistnew(); + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + /* Walk the metadata nodes and collect the matches */ + for(i=0;ijmeta);i++) { + const NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + size_t slen = strlen(skey); + size_t j, found; + /* Check for prefix taking root key into acct. */ + if((plen == 0 && slen > 0) || strncmp(skey,prefix,plen) > 0) { + const char* suffix = NULL; + /* This is a match and is not just the prefix*/ + /* truncate any segments beyond the first */ + suffix = &skey[plen]; + assert(strlen(suffix) > 0); + nclistclearall(segments); + ncz_splitkey(suffix,segments); + if(nclistlength(segments) > 0) { + seg1 = (char*)nclistremove(segments,0); + /* suppress duplicates */ + for(found=0,j=0;jformat_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + NCbytes* key = ncbytesnew(); + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + ncbytescat(key,"/"); + /* Walk the metadata nodes and collect the matches (with leading '/') */ + for(i=0;ijmeta);i++) { + NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + if(strncmp(skey,prefix,plen) > 0) { + /* This is a match and is not just the prefix*/ + ncbytessetlength(key,1); + ncbytescat(key,prefix); /* add leading '/' */ + nclistpush(matches,strdup(ncbytescontents(key))); + } + } + return stat; +} + +static int +fetch_csl_json_content_v3(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char* key, NCjson **jobjp) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jobj = NULL; + NCjson *jkey = NULL; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + NC_UNUSED(zobj_t); + assert(zmd->jmeta != NULL); + /* If asking for /zarr.json, then short circuit to read from metadata handler */ + if(strcmp(key,Z2METAROOT)==0) { + jobj = zmd->jcsl; + goto retval; + } + if(key[0] == '/') key++; /* remove any leading key */ + /* Meta-data is stored a mostly flat format using the whole key (with leading / removed) */ + if ((stat = NCJdictget(zmd->jmeta, key, (NCjson**)&jkey))) goto done; + NCJcheck(NCJclone(jkey, &jobj)); +retval: + if(jobj != NULL) + {if(jobjp) {*jobjp = jobj; jobj = NULL;}} +done: + return stat; +} + +/*/////////////////////////////////////////////////////////////////////////// +// Write to internal JSON pointer and/or directly to storage +///////////////////////////////////////////////////////////////////////////*/ + +static int +update_csl_json_content_v3(NC_FILE_INFO_T* file, NCZMD_MetadataType zobj_t, const char *key, const NCjson *jobj) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCjson* jclone = NULL; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + NC_UNUSED(zobj_t); + + assert(zmd->jcsl != NULL && zmd->jmeta != NULL); + /* clone the key value */ + NCJcheck(NCJclone(jobj,&jclone)); + + /* If writing to /zarr.json, then short circuit */ + if(strcmp(key,Z2METAROOT)==0) { + NCJcheck(NCJinsert((NCjson*)zmd->jcsl,key,jclone)); + } else { + if(key[0] == '/') key++; + NCJcheck(NCJinsert((NCjson*)zmd->jmeta,key,jclone)); + } + zmd->dirty = 1; + +#if 0 /* Do we need this? */ + // Allocating representation if doesn't exist + // Updating the internal JSON representation to be synced later + NCjson * jrep = NULL; + if ((stat = NCJdictget(zfile->metadata_handler.jcsl,"metadata", (NCjson**)&jrep)) || jrep == NULL) { + goto done; + } +#endif /*0*/ + +done: + return stat; +} + +static int +consolidate_csl_v3(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + if (zmd->dirty) { + if(zmd->jcsl == NULL) {stat = NC_EZARRMETA; goto done;} + if(zmd->jmeta == NULL) {stat = NC_EZARRMETA; goto done;} + stat = NCZ_uploadjson(zfile->map, Z3METADATA ,zmd->jcsl); + zmd->dirty = 0; + } +done: + return stat; +} + +static int +close_csl_v3(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + NCJreclaim(zmd->jcsl); zmd->jcsl = NULL; + zmd->jmeta = NULL; + return stat; +} + +static int +open_csl_v3(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + /* Read /zarr.json */ + if(zmd->jcsl == NULL) { + if((stat = NCZ_downloadjson(zfile->map,Z3METADATA,&zmd->jcsl))) goto done; + } + if(zmd->jcsl == NULL || NCJsort(zmd->jcsl) != NCJ_DICT) {stat = NC_EZARRMETA; goto done;} + /* Pull out the "metadata" key and save it */ + NCJcheck(NCJdictget(zmd->jcsl,"metadata",(NCjson**)&zmd->jmeta)); + if(zmd->jmeta == NULL || NCJsort(zmd->jmeta) != NCJ_DICT) {stat = NC_EZARRMETA; goto done;} +done: + return stat; +} + +static int +create_csl_v3(NC_FILE_INFO_T* file) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + /* Create the JSON skeleton */ + NCJcheck(NCJparse(MINIMIM_CSL_REP3_RAW,0,&zmd->jcsl)); /* Create the metadata skeleton */ + /* Pull out the "metadata" key and save it */ + NCJcheck(NCJdictget(zmd->jcsl,"metadata",(NCjson**)&zmd->jmeta)); +done: + return stat; +} + +static int +exists_key_csl_v3(NC_FILE_INFO_T* file, const char* prefix) +{ + int stat = NC_NOERR; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + size_t i,plen; + NCZ_Metadata* zmd = &zfile->metadata_handler; + + assert(zmd->jmeta != NULL && NCJsort(zmd->jmeta)==NCJ_DICT); + if(prefix[0] == '/') prefix++; /* drop leading '/' for search purposes */ + plen = strlen(prefix); + /* Walk the metadata nodes and see if there is a prefix match */ + for(i=0;ijmeta);i++) { + NCjson* jkey = NCJdictkey(zmd->jmeta,i); + const char* skey = NCJstring(jkey); + if(strncmp(skey,prefix,plen) == 0) {stat = NC_NOERR; goto done;} + } + stat = NC_ENOOBJECT; +done: + return stat; +} diff --git a/libnczarr/znc4.c b/libnczarr/znc4.c new file mode 100644 index 0000000000..a51871f29e --- /dev/null +++ b/libnczarr/znc4.c @@ -0,0 +1,122 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zincludes.h" +#include "znc4.h" +#include "zfill.h" +#include "zformat.h" +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +#include "zfilter.h" +#endif + +/**************************************************/ + +/* Forward */ + +/**************************************************/ + +int +ncz4_create_file(NC_FILE_INFO_T** filep) +{ + int stat = NC_NOERR; + NC_UNUSED(filep); + return THROW(stat); +} + +int +ncz4_create_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* name, NC_GRP_INFO_T** grpp) +{ + int stat = NC_NOERR; + char norm_name[NC_MAX_NAME]; + NC_GRP_INFO_T* grp = NULL; + NCZ_GRP_INFO_T* zgrp = NULL; + + /* Check and normalize the name. */ + if((stat = nc4_check_name(name, norm_name))) goto done; + if((stat = nc4_grp_list_add(file, parent, norm_name, &grp))) goto done; + if((zgrp = calloc(1, sizeof(NCZ_GRP_INFO_T)))==NULL) {stat = NC_ENOMEM; goto done;} + zgrp->common.file = file; + grp->format_grp_info = zgrp; + zgrp = NULL; + grp->nc4_info = file; + if(grpp) *grpp = grp; +done: + return THROW(stat); +} + +int +ncz4_create_var(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* name, NC_VAR_INFO_T** varp) +{ + int stat = NC_NOERR; + char norm_name[NC_MAX_NAME]; + NC_VAR_INFO_T* var = NULL; + NCZ_VAR_INFO_T* zvar = NULL; + + /* Check and normalize the name. */ + if((stat = nc4_check_name(name, norm_name))) goto done; + if((stat = nc4_var_list_add2(parent, norm_name, &var))) goto done; + if((zvar = calloc(1, sizeof(NCZ_VAR_INFO_T)))==NULL) {stat = NC_ENOMEM; goto done;} + zvar->common.file = file; + var->format_var_info = zvar; + zvar = NULL; + if(varp) *varp = var; +done: + return THROW(stat); +} + + +int +ncz4_create_dim(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const struct NCZ_DimInfo* dimdef, NC_DIM_INFO_T** dimp) +{ + int stat = NC_NOERR; + NC_DIM_INFO_T* dim = NULL; + NCZ_DIM_INFO_T* zdim = NULL; + if((stat = nc4_dim_list_add(parent, dimdef->norm_name, (size_t)dimdef->shape, -1, &dim))) goto done; + dim->unlimited = (dimdef->unlimited ? 1 : 0); + if((zdim = calloc(1,sizeof(NCZ_DIM_INFO_T))) == NULL) {stat = NC_ENOMEM; goto done;} + zdim->common.file = file; + dim->format_dim_info = zdim; + if(dimp) *dimp = dim; +done: + return THROW(stat); +} + +#if 0 +/* This is an abbreviated form of ncz_put_att */ +int +ncz4_create_attr(NC_FILE_INFO_T* file, NC_OBJ* container, const char* name, + nc_type typeid, size_t len, void* values, + NC_ATT_INFO_T** attp) +{ + /* Defer to zattr.c */ + return ncz_makeattr(file,container,name,typeid,len,values,attp); +??? return ncz_makeattr(file,container,name,typeid,len,values,attp); +} +#endif /*0*/ + +#if 0 +#ifdef NETCDF_ENABLE_NCZARR_FILTERS +int +ncz4_create_filter(NC_FILE_INFO_T* file, + NCZ_HDF5* hdf5, + NCZ_Codec* codec, + NCZ_Filter** filterp) +{ + int stat = NC_NOERR; + NCZ_Filter* filter = NULL; + + if((filter=(NCZ_Filter*)calloc(1,sizeof(NCZ_Filter)))==NULL) {stat = NC_ENOMEM; goto done;} +?????? + + if((stat = NCZ_addfilter(file,fvar,&filter))) goto done; + + if(filterp) {*filterp = filter; filter = NULL;} + +done: + NCZ_filter_free(filter); + return THROW(stat); +} +#endif /*NETCDF_ENABLE_NCZARR_FILTERS*/ +#endif /*0*/ diff --git a/libnczarr/znc4.h b/libnczarr/znc4.h new file mode 100644 index 0000000000..f36549b614 --- /dev/null +++ b/libnczarr/znc4.h @@ -0,0 +1,49 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ +/** + * @file + * + * @author Dennis Heimbigner + */ + +#ifndef ZNC4_H +#define ZNC4_H + +/**************************************************/ +#if defined(__cplusplus) +extern "C" { +#endif + +/* Wrap the code for creating the netcdf-4 data model objects from nc4internal.h */ + +extern int ncz4_create_file(NC_FILE_INFO_T** filep); +extern int ncz4_create_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* name, + NC_GRP_INFO_T** grpp); +extern int ncz4_create_var(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* name, NC_VAR_INFO_T** varp); +extern int ncz4_build_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, + nc_type nctype, + int storage, + int scalar, + int endianness, + size_t maxstrlen, + int dimsep, + char order, + size_t ndims, + size64_t* shape, + size64_t* chunksizes, + int* dimids, + NClist* filters, + int no_fill, + void* fill_value); +extern int ncz4_create_dim(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const struct NCZ_DimInfo* dimdef, NC_DIM_INFO_T** dimp); +extern int ncz4_create_filter(NC_FILE_INFO_T* file, + NCZ_HDF5* hdf5, + NCZ_Codec* codec, + NClist* filterlist, + NCZ_Filter** filterp); + +#if defined(__cplusplus) +} +#endif + +#endif /* ZNC4_H */ diff --git a/libnczarr/zodom.c b/libnczarr/zodom.c index 21e9204125..9f7fe1a3f8 100644 --- a/libnczarr/zodom.c +++ b/libnczarr/zodom.c @@ -5,20 +5,20 @@ #include "zincludes.h" /*Forward*/ -static int buildodom(int rank, NCZOdometer** odomp); +static int buildodom(size_t rank, NCZOdometer** odomp); void nczodom_reset(NCZOdometer* odom) { - int r; + size_t r; for(r=0;rrank;r++) odom->index[r] = odom->start[r]; } NCZOdometer* -nczodom_new(int rank, const size64_t* start, const size64_t* stop, const size64_t* stride, const size64_t* len) +nczodom_new(size_t rank, const size64_t* start, const size64_t* stop, const size64_t* stride, const size64_t* len) { - int i; + size_t i; NCZOdometer* odom = NULL; if(buildodom(rank,&odom)) return NULL; odom->properties.stride1 = 1; /* assume */ @@ -38,7 +38,7 @@ nczodom_new(int rank, const size64_t* start, const size64_t* stop, const size64_ } NCZOdometer* -nczodom_fromslices(int rank, const NCZSlice* slices) +nczodom_fromslices(size_t rank, const NCZSlice* slices) { size_t i; NCZOdometer* odom = NULL; @@ -83,8 +83,8 @@ nczodom_more(const NCZOdometer* odom) void nczodom_next(NCZOdometer* odom) { - int i; - int rank; + size_t i; + size_t rank; rank = odom->rank; for(i=rank-1;i>=0;i--) { odom->index[i] += odom->stride[i]; @@ -106,9 +106,9 @@ nczodom_indices(const NCZOdometer* odom) size64_t nczodom_offset(const NCZOdometer* odom) { - int i; + size_t i; size64_t offset; - int rank = odom->rank; + size_t rank = odom->rank; offset = 0; for(i=0;irank,nczodom_offset(odom),nczodom_avail(odom)); + fprintf(stderr,"odom{rank=%zu offset=%llu avail=%llu",odom->rank,nczodom_offset(odom),nczodom_avail(odom)); fprintf(stderr," start=("); for(i=0;irank;i++) {fprintf(stderr,"%s%llu",(i==0?"":" "),(unsigned long long)odom->start[i]);} fprintf(stderr,")"); diff --git a/libnczarr/zodom.h b/libnczarr/zodom.h index 36c52c0158..d5792fc85e 100644 --- a/libnczarr/zodom.h +++ b/libnczarr/zodom.h @@ -9,7 +9,7 @@ struct NCZSlice; typedef struct NCZOdometer { - int rank; /*rank */ + size_t rank; /*rank */ size64_t* start; size64_t* stride; size64_t* stop; /* start + (count*stride) */ @@ -24,8 +24,8 @@ typedef struct NCZOdometer { /**************************************************/ /* From zodom.c */ -extern NCZOdometer* nczodom_new(int rank, const size64_t*, const size64_t*, const size64_t*, const size64_t*); -extern NCZOdometer* nczodom_fromslices(int rank, const struct NCZSlice* slices); +extern NCZOdometer* nczodom_new(size_t rank, const size64_t*, const size64_t*, const size64_t*, const size64_t*); +extern NCZOdometer* nczodom_fromslices(size_t rank, const struct NCZSlice* slices); extern int nczodom_more(const NCZOdometer*); extern void nczodom_next(NCZOdometer*); extern size64_t* nczodom_indices(const NCZOdometer*); diff --git a/libnczarr/zopen.c b/libnczarr/zopen.c index 2171f7b23d..e1599c9c44 100644 --- a/libnczarr/zopen.c +++ b/libnczarr/zopen.c @@ -74,7 +74,7 @@ ncz_open_file(const char *path, int mode, NClist* controls, int ncid) LOG((3, "%s: path %s mode %d", __func__, path, mode)); assert(path); - ZTRACE(2,"path=%s,mode=%d,ncid=%d,controls=%s)",path,mode,ncid,(controls?nczprint_envv(controls):"null")); + ZTRACE(2,"path=%s,mode=%d,ncid=%d,controls=%s)",path,mode,ncid,(controls?nczprint_envlist(controls):"null")); /* Convert ncid to an NC* structure pointer */ if((stat = NC_check_id(ncid,&nc))) goto exit; @@ -99,16 +99,9 @@ ncz_open_file(const char *path, int mode, NClist* controls, int ncid) if((stat = ncz_open_dataset(h5,controls))) goto exit; - /* Now read in all the metadata. Some types - * information may be difficult to resolve here, if, for example, a - * dataset of user-defined type is encountered before the - * definition of that type. */ - if((stat = ncz_read_file(h5))) - goto exit; - /* We must read in the attributes of the root group to get e.g. provenance and classic model attribute */ - if((stat = ncz_read_atts(h5,(NC_OBJ*)h5->root_grp))) goto exit; + if((stat = ncz_getattlist(h5->root_grp,NC_GLOBAL,NULL,NULL))) goto exit; /* Check for classic model attribute. */ if ((stat = check_for_classic_model(h5->root_grp, &is_classic))) @@ -151,6 +144,9 @@ NCZ_open(const char *path, int mode, int basepe, size_t *chunksizehintp, int stat = NC_NOERR; NCURI* uri = NULL; + NC_UNUSED(basepe); + NC_UNUSED(chunksizehintp); + ZTRACE(0,"path=%s,mode=%d,ncid=%d)",path,mode,ncid); NC_UNUSED(parameters); diff --git a/libnczarr/zplugins.c b/libnczarr/zplugins.c index bc24ad041b..23bdc06294 100644 --- a/libnczarr/zplugins.c +++ b/libnczarr/zplugins.c @@ -82,8 +82,10 @@ int NCZ_plugin_path_finalize(void) { int stat = NC_NOERR; - size_t i; struct NCglobalstate* gs = NC_getglobalstate(); +#ifdef NETCDF_ENABLE_NCZARR_FILTERS + size_t i; +#endif #ifdef NETCDF_ENABLE_NCZARR_FILTERS /* Reclaim all loaded filters */ @@ -135,7 +137,6 @@ NCZ_plugin_path_ndirs(size_t* ndirsp) ndirs = nclistlength(gs->zarr.pluginpaths); if(ndirsp) *ndirsp = ndirs; -done: return THROW(stat); } @@ -243,20 +244,28 @@ NCZ_load_all_plugins(void) /* Try to load plugins from this directory */ if((ret = NCZ_load_plugin_dir(dir))) goto done; } - if(nclistlength(gs->zarr.codec_defaults)) { /* Try to provide default for any HDF5 filters without matching Codec. */ - /* Search the defaults */ - for(j=0;jzarr.codec_defaults);j++) { - struct CodecAPI* dfalt = (struct CodecAPI*)nclistget(gs->zarr.codec_defaults,j); - if(dfalt->codec != NULL) { - const NCZ_codec_t* codec = dfalt->codec; - size_t hdf5id = codec->hdf5id; - NCZ_Plugin* p = NULL; - if(hdf5id <= 0 || hdf5id > gs->zarr.loaded_plugins_max) {ret = NC_EFILTER; goto done;} - p = gs->zarr.loaded_plugins[hdf5id]; /* get candidate */ - if(p != NULL && p->hdf5.filter != NULL - && p->codec.codec == NULL) { - p->codec.codec = codec; - p->codec.codeclib = dfalt->codeclib; + + /* Try to provide default for any HDF5 filters without matching Codec. */ + if(nclistlength(gs->zarr.codec_defaults)) { + /* Search the loaded_plugins */ + for(i=1;i<=gs->zarr.loaded_plugins_max;i++) { + int matched = 0; + NCZ_Plugin* p = (NCZ_Plugin*)gs->zarr.loaded_plugins[i]; + /* Check if plugin has codec */ + if(p != NULL && p->hdf5.filter != NULL && p->codec.codec == NULL) { + /* Find for a default for this */ + matched = 0; + for(j=0;!matched && jzarr.codec_defaults);j++) { + struct CodecAPI* dfalt = (struct CodecAPI*)nclistget(gs->zarr.codec_defaults,j); + if(dfalt->codec->hdf5id == (unsigned)p->hdf5.filter->id && !dfalt->ishdf5raw) { + p->codec = *dfalt; + p->codec.defaulted = 1; + matched = 1; + } + } + /* Last chance: use hdfraw */ + if(!matched && gs->zarr.hdf5raw != NULL) { + p->codec = *gs->zarr.hdf5raw; p->codec.defaulted = 1; } } @@ -276,19 +285,21 @@ NCZ_load_all_plugins(void) } } } - /* Iniitalize all remaining plugins */ + /* Initalize all remaining plugins */ { size_t i; NCZ_Plugin* p; + NCproplist* props = ncproplistnew(); for(i=1;izarr.loaded_plugins_max;i++) { if((p = gs->zarr.loaded_plugins[i]) != NULL) { if(p->incomplete) continue; if(p->hdf5.filter != NULL && p->codec.codec != NULL) { if(p->codec.codec && p->codec.codec->NCZ_codec_initialize) - p->codec.codec->NCZ_codec_initialize(); + p->codec.codec->NCZ_codec_initialize(props); } } } + ncproplistfree(props); } done: @@ -399,30 +410,29 @@ NCZ_load_plugin(const char* path, struct NCZ_Plugin** plugp) /* We can have cpd or we can have (gpt && gpi && npi) but not both sets */ if(cpd != NULL) { + /* Get vector of default codecs */ cp = (const NCZ_codec_t**)cpd(); - } else {/* cpd => !gpt && !gpi && !npi */ - if(gpt != NULL && gpi != NULL) { /* get HDF5 info */ - h5type = gpt(); - h5class = gpi(); - /* Verify */ - if(h5type != H5PL_TYPE_FILTER) {stat = NC_EPLUGIN; goto done;} - if(h5class->version != H5Z_CLASS_T_VERS) {stat = NC_EFILTER; goto done;} - } - if(npi != NULL) {/* get Codec info */ - codec = npi(); - /* Verify */ - if(codec->version != NCZ_CODEC_CLASS_VER) {stat = NC_EPLUGIN; goto done;} - if(codec->sort != NCZ_CODEC_HDF5) {stat = NC_EPLUGIN; goto done;} + if(cp != NULL) { + int used = 0; + if((stat = loadcodecdefaults(path,cp,lib,&used))) goto done; + if(used) lib = NULL; } + goto done; + } + /* else !cpd => (gpt && gpi && npi) */ + if(gpt != NULL && gpi != NULL) { /* get HDF5 info */ + h5type = gpt(); + h5class = gpi(); + /* Verify */ + if(h5type != H5PL_TYPE_FILTER) {stat = NC_EPLUGIN; goto done;} + if(h5class->version != H5Z_CLASS_T_VERS) {stat = NC_EFILTER; goto done;} + } + if(npi != NULL) {/* get Codec info */ + codec = npi(); + /* Verify */ + if(codec->version != NCZ_CODEC_CLASS_VER) {stat = NC_EPLUGIN; goto done;} + if(codec->sort != NCZ_CODEC_HDF5) {stat = NC_EPLUGIN; goto done;} } - } - - /* Handle defaults separately */ - if(cp != NULL) { - int used = 0; - if((stat = loadcodecdefaults(path,cp,lib,&used))) goto done; - if(used) lib = NULL; - goto done; } if(h5class != NULL && codec != NULL) { @@ -482,13 +492,15 @@ NCZ_unload_plugin(NCZ_Plugin* plugin) ZTRACE(9,"plugin=%p",plugin); if(plugin) { + NCproplist* props = ncproplistnew(); if(plugin->codec.codec && plugin->codec.codec->NCZ_codec_finalize) - plugin->codec.codec->NCZ_codec_finalize(); + plugin->codec.codec->NCZ_codec_finalize(props); if(plugin->hdf5.filter != NULL) gs->zarr.loaded_plugins[plugin->hdf5.filter->id] = NULL; if(plugin->hdf5.hdf5lib != NULL) (void)ncpsharedlibfree(plugin->hdf5.hdf5lib); if(!plugin->codec.defaulted && plugin->codec.codeclib != NULL) (void)ncpsharedlibfree(plugin->codec.codeclib); memset(plugin,0,sizeof(NCZ_Plugin)); free(plugin); + ncproplistfree(props); } return ZUNTRACE(NC_NOERR); } @@ -553,15 +565,34 @@ loadcodecdefaults(const char* path, const NCZ_codec_t** cp, NCPSharedLib* lib, i int lib_used = 0; struct NCglobalstate* gs = NC_getglobalstate(); + NC_UNUSED(path); + nclistpush(gs->zarr.default_libs,lib); for(;*cp;cp++) { + size_t i; struct CodecAPI* c0; c0 = (struct CodecAPI*)calloc(1,sizeof(struct CodecAPI)); if(c0 == NULL) {stat = NC_ENOMEM; goto done;} c0->codec = *cp; c0->codeclib = lib; lib_used = 1; /* remember */ + /* Replace duplicates */ + for(i=0;izarr.codec_defaults);i++) { + struct CodecAPI* cold = (struct CodecAPI*)nclistget(gs->zarr.codec_defaults,i); + if(cold != NULL && strcmp(c0->codec->codecid,cold->codec->codecid)==0) { + cold = (struct CodecAPI*)nclistremove(gs->zarr.codec_defaults,i); + nullfree(cold); + break; + } + } nclistpush(gs->zarr.codec_defaults,c0); c0 = NULL; + /* Was this the hdf5raw codec? */ + for(i=0;izarr.codec_defaults);i++) { + struct CodecAPI* codec = (struct CodecAPI*)nclistget(gs->zarr.codec_defaults,i); + if(codec->codec->hdf5id == H5Z_FILTER_RAW && strcasecmp(codec->codec->codecid,H5Z_CODEC_RAW)==0) { + gs->zarr.hdf5raw = codec; /* Overwrite any previous */ + } + } } done: if(lib_usedp) *lib_usedp = lib_used; diff --git a/libnczarr/zplugins.h b/libnczarr/zplugins.h index 3ad26abd77..d43b344193 100644 --- a/libnczarr/zplugins.h +++ b/libnczarr/zplugins.h @@ -31,7 +31,7 @@ typedef struct NCZ_Plugin { } hdf5; struct CodecAPI { int defaulted; /* codeclib was a defaulting library */ - int ishdf5raw; /* The codec is the hdf5raw codec */ + int ishdf5raw; /* The codec uses the hdf5raw format */ const struct NCZ_codec_t* codec; struct NCPSharedLib* codeclib; /* of the codec; null if same as hdf5 */ } codec; diff --git a/libnczarr/zprov.c b/libnczarr/zprov.c index a56d32368b..b73fd479fb 100644 --- a/libnczarr/zprov.c +++ b/libnczarr/zprov.c @@ -237,12 +237,15 @@ NCZ_write_provenance(NC_FILE_INFO_T* file) /* ZARR Specific attribute read/write of _NCProperties */ static int -NCZ_read_ncproperties(NC_FILE_INFO_T* h5, const char* value, char** propstring) +NCZ_read_ncproperties(NC_FILE_INFO_T* file, const char* value, char** propstring) { int stat = NC_NOERR; char* text = NULL; size_t len; + NC_UNUSED(file); + + LOG((5, "%s", __func__)); /* NCPROPS Attribute exists, make sure it is legitimate */ @@ -274,7 +277,8 @@ NCZ_write_ncproperties(NC_FILE_INFO_T* h5) #ifdef SUPPRESSNCPROPERTY return NC_NOERR; #else /*!SUPPRESSNCPROPERTY*/ - int i,stat = NC_NOERR; + int stat = NC_NOERR; + size_t i; NC4_Provenance* prov = &h5->provenance; NC_ATT_INFO_T* ncprops = NULL; NCindex* attlist = NULL; @@ -366,282 +370,3 @@ NCZ_clear_provenance(NC4_Provenance* prov) memset(prov,0,sizeof(NC4_Provenance)); return NC_NOERR; } - -#if 0 -/* Unused functions */ - -/** - * @internal Parse file properties. - * - * @param text0 Text properties. - * @param pairs list of parsed (key,value) pairs - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -properties_parse(const char* text0, NClist* pairs) -{ - int ret = NC_NOERR; - char* p; - char* q; - char* text = NULL; - - if(text0 == NULL || strlen(text0) == 0) - goto done; - - text = strdup(text0); - if(text == NULL) return NC_ENOMEM; - - /* For back compatibility with version 1, translate '|' -> ',' */ - for(p=text;*p;p++) { - if(*p == NCPROPSSEP1) - *p = NCPROPSSEP2; - } - - /* Walk and fill in ncinfo */ - p = text; - while(*p) { - char* name = p; - char* value = NULL; - char* next = NULL; - - /* Delimit whole (key,value) pair */ - q = locate(p,NCPROPSSEP2); - if(*q != '\0') /* Never go beyond the final nul term */ - *q++ = '\0'; - next = q; - /* split key and value */ - q = locate(p,'='); - name = p; - *q++ = '\0'; - value = q; - /* Set up p for next iteration */ - p = next; - nclistpush(pairs,strdup(name)); - nclistpush(pairs,strdup(value)); - } -done: - if(text) free(text); - return ret; -} - -/* Locate a specific character and return its pointer - or EOS if not found - take \ escapes into account */ -static char* -locate(char* p, char tag) -{ - char* next; - int c; - assert(p != NULL); - for(next = p;(c = *next);next++) { - if(c == tag) - return next; - else if(c == '\\' && next[1] != '\0') - next++; /* skip escaped char */ - } - return next; /* not found */ -} - -/* Utility to transfer a string to a buffer with escaping */ -static void -escapify(NCbytes* buffer, const char* s) -{ - const char* p; - for(p=s;*p;p++) { - if(strchr(ESCAPECHARS,*p) != NULL) - ncbytesappend(buffer,'\\'); - ncbytesappend(buffer,*p); - } -} - -/** - * @internal - * - * Clear and Free the NC4_Provenance object - * @param prov Pointer to provenance object - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -NCZ_free_provenance(NC4_Provenance* prov) -{ - LOG((5, "%s", __func__)); - - if(prov == NULL) return NC_NOERR; - NCZ_clear_provenance(prov); - free(prov); - return NC_NOERR; -} - -/** - * @internal Build _NCProperties attribute value. - * - * Convert a NCPROPINFO instance to a single string. - * Will always convert to current format - * - * @param version - * @param list Properties list - * @param spropp Pointer that gets properties string. - * @return ::NC_NOERR No error. - * @return ::NC_EINVAL failed. - * @author Dennis Heimbigner - */ -static int -build_propstring(int version, NClist* list, char** spropp) -{ - int stat = NC_NOERR; - int i; - NCbytes* buffer = NULL; - char sversion[64]; - - LOG((5, "%s version=%d", __func__, version)); - - if(spropp != NULL) *spropp = NULL; - - if(version == 0 || version > NCPROPS_VERSION) /* unknown case */ - goto done; - if(list == NULL) - {stat = NC_EINVAL; goto done;} - - if((buffer = ncbytesnew()) == NULL) - {stat = NC_ENOMEM; goto done;} - - /* start with version */ - ncbytescat(buffer,NCPVERSION); - ncbytesappend(buffer,'='); - /* Use current version */ - snprintf(sversion,sizeof(sversion),"%d",version); - ncbytescat(buffer,sversion); - - for(i=0;i we closing file as opposed to sync'ing it. * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ int -ncz_sync_file(NC_FILE_INFO_T* file, int isclose) +ncz_encode_file(NC_FILE_INFO_T* file, int isclose) { int stat = NC_NOERR; - NCjson* json = NULL; NC_UNUSED(isclose); @@ -79,63 +67,13 @@ ncz_sync_file(NC_FILE_INFO_T* file, int isclose) ZTRACE(3,"file=%s isclose=%d",file->controller->path,isclose); /* Write out root group recursively */ - if((stat = ncz_sync_grp(file, file->root_grp, isclose))) + if((stat = ncz_encode_grp(file, file->root_grp))) goto done; -done: - NCJreclaim(json); - return ZUNTRACE(stat); -} - -/** - * @internal Synchronize dimension data from memory to map. - * - * @param grp Pointer to grp struct containing the dims. - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) -{ - int stat=NC_NOERR; - size_t i; - NCjson* jdims = NULL; - NCjson* jdimsize = NULL; - NCjson* jdimargs = NULL; - - LOG((3, "%s: ", __func__)); - ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); + if((stat = NCZMD_consolidate(file))) goto done; - NCJnew(NCJ_DICT,&jdims); - for(i=0; idim); i++) { - NC_DIM_INFO_T* dim = (NC_DIM_INFO_T*)ncindexith(grp->dim,i); - char slen[128]; - - snprintf(slen,sizeof(slen),"%llu",(unsigned long long)dim->len); - NCJnewstring(NCJ_INT,slen,&jdimsize); - - /* If dim is not unlimited, then write in the old format to provide - maximum back compatibility. - */ - if(dim->unlimited) { - NCJnew(NCJ_DICT,&jdimargs); - if((stat = NCJaddstring(jdimargs,NCJ_STRING,"size"))<0) {stat = NC_EINVAL; goto done;} - if((stat = NCJappend(jdimargs,jdimsize))<0) {stat = NC_EINVAL; goto done;} - jdimsize = NULL; - if((stat = NCJaddstring(jdimargs,NCJ_STRING,"unlimited"))<0) {stat = NC_EINVAL; goto done;} - if((stat = NCJaddstring(jdimargs,NCJ_INT,"1"))<0) {stat = NC_EINVAL; goto done;} - } else { /* !dim->unlimited */ - jdimargs = jdimsize; - jdimsize = NULL; - } - if((stat = NCJaddstring(jdims,NCJ_STRING,dim->hdr.name))<0) {stat = NC_EINVAL; goto done;} - if((stat = NCJappend(jdims,jdimargs))<0) {stat = NC_EINVAL; goto done;} - } - if(jdimsp) {*jdimsp = jdims; jdims = NULL;} done: - NCJreclaim(jdims); - return ZUNTRACE(THROW(stat)); + return ZUNTRACE(stat); } /** @@ -147,141 +85,62 @@ ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ -int -ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) +static int +ncz_encode_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) { int stat = NC_NOERR; size_t i; NCZ_FILE_INFO_T* zinfo = NULL; - char version[1024]; int purezarr = 0; - NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; - NCjson* json = NULL; - NCjson* jgroup = NULL; - NCjson* jdims = NULL; - NCjson* jvars = NULL; - NCjson* jsubgrps = NULL; NCjson* jnczgrp = NULL; NCjson* jsuper = NULL; - NCjson* jtmp = NULL; - NCjson* jatts = NULL; NCjson* jtypes = NULL; + struct ZOBJ zobj = NCZ_emptyzobj(); - LOG((3, "%s: dims: %s", __func__, key)); - ZTRACE(3,"file=%s grp=%s isclose=%d",file->controller->path,grp->hdr.name,isclose); + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); zinfo = file->format_file_info; - map = zinfo->map; - - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + TESTPUREZARR; - /* Construct grp key */ - if((stat = NCZ_grpkey(grp,&fullpath))) - goto done; - - /* build ZGROUP contents */ - NCJnew(NCJ_DICT,&jgroup); - snprintf(version,sizeof(version),"%d",zinfo->zarr.zarr_version); - if((stat = NCJaddstring(jgroup,NCJ_STRING,"zarr_format"))<0) {stat = NC_EINVAL; goto done;} - if((stat = NCJaddstring(jgroup,NCJ_INT,version))<0) {stat = NC_EINVAL; goto done;} - /* build ZGROUP path */ - if((stat = nczm_concat(fullpath,ZGROUP,&key))) - goto done; - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jgroup))) goto done; - nullfree(key); key = NULL; + /* Create|Update the dual attributes */ + if((stat = NCZ_ensure_dual_attributes(file,(NC_OBJ*)grp))) goto done; if(!purezarr) { if(grp->parent == NULL) { /* Root group */ - /* create superblock */ - snprintf(version,sizeof(version),"%lu.%lu.%lu", - zinfo->zarr.nczarr_version.major, - zinfo->zarr.nczarr_version.minor, - zinfo->zarr.nczarr_version.release); - NCJnew(NCJ_DICT,&jsuper); - if((stat = NCJinsertstring(jsuper,"version",version))<0) {stat = NC_EINVAL; goto done;} + if((stat=NCZF_encode_superblock(file,&jsuper))) goto done; } - /* Create dimensions dict */ - if((stat = ncz_collect_dims(file,grp,&jdims))) goto done; - - /* Create vars list */ - NCJnew(NCJ_ARRAY,&jvars); - for(i=0; ivars); i++) { - NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); - if((stat = NCJaddstring(jvars,NCJ_STRING,var->hdr.name))<0) {stat = NC_EINVAL; goto done;} - } - - /* Create subgroups list */ - NCJnew(NCJ_ARRAY,&jsubgrps); - for(i=0; ichildren); i++) { - NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); - if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))<0) {stat = NC_EINVAL; goto done;} - } - /* Create the "_nczarr_group" dict */ - NCJnew(NCJ_DICT,&jnczgrp); - /* Insert the various dicts and arrays */ - if((stat = NCJinsert(jnczgrp,"dimensions",jdims))<0) {stat = NC_EINVAL; goto done;} - jdims = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jnczgrp,"arrays",jvars))<0) {stat = NC_EINVAL; goto done;} - jvars = NULL; /* avoid memory problems */ - if((stat = NCJinsert(jnczgrp,"groups",jsubgrps))<0) {stat = NC_EINVAL; goto done;} - jsubgrps = NULL; /* avoid memory problems */ + /* encode _nczarr_group */ + if((stat=NCZF_encode_nczarr_group(file,grp,&jnczgrp))) goto done; } - /* Build the .zattrs object */ - assert(grp->att); - NCJnew(NCJ_DICT,&jatts); - NCJnew(NCJ_DICT,&jtypes); - if((stat = ncz_sync_atts(file, (NC_OBJ*)grp, grp->att, jatts, jtypes, isclose))) goto done; - - if(!purezarr && jnczgrp != NULL) { - /* Insert _nczarr_group */ - if((stat=insert_attr(jatts,jtypes,NCZ_V2_GROUP,jnczgrp,"|J0"))) goto done; - jnczgrp = NULL; - } - if(!purezarr && jsuper != NULL) { - /* Insert superblock */ - if((stat=insert_attr(jatts,jtypes,NCZ_V2_SUPERBLOCK,jsuper,"|J0"))) goto done; - jsuper = NULL; - } + /* Assemble group JSON object */ + if((stat=NCZF_encode_group(file,grp,&zobj.jobj))) goto done; - /* As a last mod to jatts, insert the jtypes as an attribute */ - if(!purezarr && jtypes != NULL) { - if((stat = insert_nczarr_attr(jatts,jtypes))) goto done; - jtypes = NULL; - } + /* Assemble JSON'ized attributes: Optionally uses _nczarr_group &/or _nczarr_superblock */ + if((stat = NCZF_encode_attributes(file,(NC_OBJ*)grp,&jnczgrp,&jsuper,&zobj.jatts))) goto done; - /* Write out the .zattrs */ - if((stat = upload_attrs(file,(NC_OBJ*)grp,jatts))) goto done; + /* upload group json and (depending on version) the group attributes */ + if((stat = NCZF_upload_grp(file,grp,&zobj))) goto done; - /* Now synchronize all the variables */ - for(i=0; ivars); i++) { - NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); - if((stat = ncz_sync_var(file,var,isclose))) goto done; + /* encode and upload the vars in this group and sync the data */ + for(i=0;ivars);i++) { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i); + if((stat = ncz_encode_var_meta(file,var))) goto done; + if((stat = ncz_flush_var(file,var))) goto done; } - - /* Now recurse to synchronize all the subgrps */ - for(i=0; ichildren); i++) { - NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); - if((stat = ncz_sync_grp(file,g,isclose))) goto done; + + /* encode and upload the sub-groups in this group */ + for(i=0;ichildren);i++) { + NC_GRP_INFO_T* subgrp = (NC_GRP_INFO_T*)ncindexith(grp->children,i); + if((stat = ncz_encode_grp(file,subgrp))) goto done; } - + done: - NCJreclaim(jtmp); - NCJreclaim(jsuper); - NCJreclaim(json); - NCJreclaim(jgroup); - NCJreclaim(jdims); - NCJreclaim(jvars); - NCJreclaim(jsubgrps); - NCJreclaim(jnczgrp); - NCJreclaim(jtypes); - NCJreclaim(jatts); - nullfree(fullpath); - nullfree(key); + NCZ_clear_zobj(&zobj); + NCZ_reclaim_json(jnczgrp); + NCZ_reclaim_json(jtypes); + NCZ_reclaim_json(jsuper); return ZUNTRACE(THROW(stat)); } @@ -290,47 +149,28 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) * * @param file Pointer to file struct * @param var Pointer to var struct - * @param isclose If this called as part of nc_close() as opposed to nc_enddef(). * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ static int -ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) +ncz_encode_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) { - size_t i; int stat = NC_NOERR; NCZ_FILE_INFO_T* zinfo = NULL; - char number[1024]; - NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; - char* dimpath = NULL; - NClist* dimrefs = NULL; - NCjson* jvar = NULL; - NCjson* jncvar = NULL; - NCjson* jdimrefs = NULL; - NCjson* jtmp = NULL; - NCjson* jfill = NULL; - NCjson* jatts = NULL; - NCjson* jtypes = NULL; - char* dtypename = NULL; + NCjson* jnczvar = NULL; int purezarr = 0; - size64_t shape[NC_MAX_VAR_DIMS]; NCZ_VAR_INFO_T* zvar = var->format_var_info; -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - NClist* filterchain = NULL; - NCjson* jfilter = NULL; -#endif + NClist* filtersj = nclistnew(); + struct ZOBJ zobj = NCZ_emptyzobj(); - ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + ZTRACE(3,"file=%s var=%s",file->controller->path,var->hdr.name); zinfo = file->format_file_info; - map = zinfo->map; - - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + TESTPUREZARR; /* Make sure that everything is established */ + /* ensure the fill value */ if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ assert(var->no_fill || var->fill_value != NULL); @@ -340,279 +180,93 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCZ_ensure_fill_chunk(zvar->cache))) goto done; #ifdef NETCDF_ENABLE_NCZARR_FILTERS /* Build the filter working parameters for any filters */ - if((stat = NCZ_filter_setup(var))) goto done; -#endif - - /* Construct var path */ - if((stat = NCZ_varkey(var,&fullpath))) - goto done; - - /* Create the zarray json object */ - NCJnew(NCJ_DICT,&jvar); - - /* zarr_format key */ - snprintf(number,sizeof(number),"%d",zinfo->zarr.zarr_version); - if((stat = NCJaddstring(jvar,NCJ_STRING,"zarr_format"))<0) {stat = NC_EINVAL; goto done;} - if((stat = NCJaddstring(jvar,NCJ_INT,number))<0) {stat = NC_EINVAL; goto done;} - - /* Collect the shape vector */ - for(i=0;indims;i++) { - NC_DIM_INFO_T* dim = var->dim[i]; - shape[i] = dim->len; - } - /* but might be scalar */ - if(var->ndims == 0) - shape[0] = 1; - - /* shape key */ - /* Integer list defining the length of each dimension of the array.*/ - /* Create the list */ - NCJnew(NCJ_ARRAY,&jtmp); - if(zvar->scalar) { - NCJaddstring(jtmp,NCJ_INT,"1"); - } else for(i=0;indims;i++) { - snprintf(number,sizeof(number),"%llu",shape[i]); - NCJaddstring(jtmp,NCJ_INT,number); - } - if((stat = NCJinsert(jvar,"shape",jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - - /* dtype key */ - /* A string or list defining a valid data type for the array. */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"dtype"))<0) {stat = NC_EINVAL; goto done;} - { /* Add the type name */ - int endianness = var->type_info->endianness; - int atomictype = var->type_info->hdr.id; - assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); - if((stat = ncz_nctype2dtype(atomictype,endianness,purezarr,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename))) goto done; - if((stat = NCJaddstring(jvar,NCJ_STRING,dtypename))<0) {stat = NC_EINVAL; goto done;} - nullfree(dtypename); dtypename = NULL; - } - - /* chunks key */ - /* The zarr format does not support the concept - of contiguous (or compact), so it will never appear in the read case. - */ - /* list of chunk sizes */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"chunks"))<0) {stat = NC_EINVAL; goto done;} - /* Create the list */ - NCJnew(NCJ_ARRAY,&jtmp); - if(zvar->scalar) { - NCJaddstring(jtmp,NCJ_INT,"1"); /* one chunk of size 1 */ - } else for(i=0;indims;i++) { - size64_t len = var->chunksizes[i]; - snprintf(number,sizeof(number),"%lld",len); - NCJaddstring(jtmp,NCJ_INT,number); - } - if((stat = NCJappend(jvar,jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - - /* fill_value key */ - if(var->no_fill) { - NCJnew(NCJ_NULL,&jfill); - } else {/*!var->no_fill*/ - int atomictype = var->type_info->hdr.id; - if(var->fill_value == NULL) { - if((stat = NCZ_ensure_fill_value(var))) goto done; - } - /* Convert var->fill_value to a string */ - if((stat = NCZ_stringconvert(atomictype,1,var->fill_value,&jfill))) goto done; - assert(jfill->sort != NCJ_ARRAY); - } - if((stat = NCJinsert(jvar,"fill_value",jfill))<0) {stat = NC_EINVAL; goto done;} - jfill = NULL; - - /* order key */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"order"))<0) {stat = NC_EINVAL; goto done;} - /* "C" means row-major order, i.e., the last dimension varies fastest; - "F" means column-major order, i.e., the first dimension varies fastest.*/ - /* Default to C for now */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"C"))<0) {stat = NC_EINVAL; goto done;} - - /* Compressor and Filters */ - /* compressor key */ - /* From V2 Spec: A JSON object identifying the primary compression codec and providing - configuration parameters, or ``null`` if no compressor is to be used. */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"compressor"))<0) {stat = NC_EINVAL; goto done;} -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - filterchain = (NClist*)var->filters; - if(nclistlength(filterchain) > 0) { - struct NCZ_Filter* filter = (struct NCZ_Filter*)nclistget(filterchain,nclistlength(filterchain)-1); - /* encode up the compressor */ - if((stat = NCZ_filter_jsonize(file,var,filter,&jtmp))) goto done; - } else -#endif - { /* no filters at all */ - /* Default to null */ - NCJnew(NCJ_NULL,&jtmp); - } - if(jtmp && (stat = NCJappend(jvar,jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - - /* filters key */ - /* From V2 Spec: A list of JSON objects providing codec configurations, - or null if no filters are to be applied. Each codec configuration - object MUST contain a "id" key identifying the codec to be used. */ - /* A list of JSON objects providing codec configurations, or ``null`` - if no filters are to be applied. */ - if((stat = NCJaddstring(jvar,NCJ_STRING,"filters"))<0) {stat = NC_EINVAL; goto done;} -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - if(nclistlength(filterchain) > 1) { - size_t k; - /* jtmp holds the array of filters */ - NCJnew(NCJ_ARRAY,&jtmp); - for(k=0;kdimension_separator != DFALT_DIM_SEPARATOR) { - char sep[2]; - sep[0] = zvar->dimension_separator;/* make separator a string*/ - sep[1] = '\0'; - NCJnewstring(NCJ_STRING,sep,&jtmp); - if((stat = NCJinsert(jvar,"dimension_separator",jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - } - /* build .zarray path */ - if((stat = nczm_concat(fullpath,ZARRAY,&key))) - goto done; - - /* Write to map */ - if((stat=NCZ_uploadjson(map,key,jvar))) - goto done; - nullfree(key); key = NULL; - - /* Capture dimref names as FQNs */ - if(var->ndims > 0) { - if((dimrefs = nclistnew())==NULL) {stat = NC_ENOMEM; goto done;} - for(i=0;indims;i++) { - NC_DIM_INFO_T* dim = var->dim[i]; - if((stat = NCZ_dimkey(dim,&dimpath))) goto done; - nclistpush(dimrefs,dimpath); - dimpath = NULL; - } + /* There is a sort of cycle between _nczarr_array and the attributes in that + the attributes must contain _nczarr_array as an attribute and _nczar_array + must contain the attribute types (including _nczarr_array). + We break this by building _nczarr_array first, then building the attributes. + */ + /* Build the _nczarr_array object */ + if(!purezarr) { + if((stat=NCZF_encode_nczarr_array(file,var,&jnczvar))) goto done; } - /* Build the NCZ_V2_ARRAY object */ - { - /* Create the dimrefs json object */ - NCJnew(NCJ_ARRAY,&jdimrefs); - for(i=0;indims == 0) { - NCJnewstring(NCJ_INT,"1",&jtmp); - if((stat = NCJinsert(jncvar,"scalar",jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - } - /* everything looks like it is chunked */ - NCJnewstring(NCJ_STRING,"chunked",&jtmp); - if((stat = NCJinsert(jncvar,"storage",jtmp))<0) {stat = NC_EINVAL; goto done;} - jtmp = NULL; - } + /* Some attributes need to be computed because they are not stored in the NC_XXX_INFO_T structs. + * The current such attributes are: + * 1. xarray attribute + */ + if((stat = ncz_create_computed_var_attributes(file,var))) goto done; - /* Build .zattrs object */ - assert(var->att); - NCJnew(NCJ_DICT,&jatts); - NCJnew(NCJ_DICT,&jtypes); - if((stat = ncz_sync_atts(file,(NC_OBJ*)var, var->att, jatts, jtypes, isclose))) goto done; + /* Create|Update the dual attributes */ + if((stat = NCZ_ensure_dual_attributes(file,(NC_OBJ*)var))) goto done; - if(!purezarr && jncvar != NULL) { - /* Insert _nczarr_array */ - if((stat=insert_attr(jatts,jtypes,NCZ_V2_ARRAY,jncvar,"|J0"))) goto done; - jncvar = NULL; - } + /* Convert to JSON */ + if((stat=NCZF_encode_attributes(file,(NC_OBJ*)var,&jnczvar,NULL,&zobj.jatts))) goto done; - /* As a last mod to jatts, optionally insert the jtypes as an attribute and add _nczarr_attr as attribute*/ - if(!purezarr && jtypes != NULL) { - if((stat = insert_nczarr_attr(jatts,jtypes))) goto done; - jtypes = NULL; - } + /* Encode the filters */ + if((stat=ncz_encode_filters(file,var,filtersj))) goto done; - /* Write out the .zattrs */ - if((stat = upload_attrs(file,(NC_OBJ*)var,jatts))) goto done; + /* encode the var JSON including (optionally) the attributes */ + if((stat=NCZF_encode_var(file,var,filtersj,&zobj.jobj))) goto done; + /* Write out the the var JSON and the corresponding attributes and chunks */ + if((stat = NCZF_upload_var(file,var,&zobj))) goto done; var->created = 1; done: - nclistfreeall(dimrefs); - nullfree(fullpath); - nullfree(key); - nullfree(dtypename); - nullfree(dimpath); - NCJreclaim(jvar); - NCJreclaim(jncvar); - NCJreclaim(jtmp); - NCJreclaim(jfill); - NCJreclaim(jatts); - NCJreclaim(jtypes); + NCZ_clear_zobj(&zobj); + NCZ_reclaim_json(jnczvar); + NCZ_reclaim_json_list(filtersj); return ZUNTRACE(THROW(stat)); } -/** - * @internal Synchronize variable meta data and data from memory to map. - * - * @param file Pointer to file struct - * @param var Pointer to var struct - * @param isclose If this called as part of nc_close() as opposed to nc_enddef(). - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner +/* Some attributes need to be computed because they are not stored in the NC_XXX_INFO_T structs. + * The current such attributes are: + * 1. xarray attribute */ static int -ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) +ncz_create_computed_var_attributes(NC_FILE_INFO_T* file,NC_VAR_INFO_T* var) { int stat = NC_NOERR; - NCZ_VAR_INFO_T* zvar = var->format_var_info; - - ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + NC_GRP_INFO_T* parent = var->container; + NC_ATT_INFO_T* special = NULL; + char* xarraydims = NULL; + int isnew = 0; - if(isclose) { - if((stat = ncz_sync_var_meta(file,var,isclose))) goto done; - } + if(parent->parent != NULL) goto done; /* Only do this for root group */ - /* flush only chunks that have been written */ - if(zvar->cache) { - if((stat = NCZ_flush_chunk_cache(zvar->cache))) - goto done; + if(zinfo->flags & FLAG_XARRAYDIMS) { /* test if we should generate xarray dimensions */ + special = NULL; + isnew = 0; + /* get/create the xarray attribute as type NC_CHAR */ + if((stat = NCZ_getattr(file,(NC_OBJ*)var,NC_XARRAY_DIMS,NC_CHAR,&special,&isnew))) goto done; + if(isnew) { + size_t zarr_rank; + if((stat = NCZF_encode_xarray(file,var->ndims,var->dim,&xarraydims,&zarr_rank))) goto done; + if((stat = NCZ_set_att_data(file,special,strlen(xarraydims),xarraydims))) goto done; + } } done: - return ZUNTRACE(THROW(stat)); + nullfree(xarraydims); + return THROW(stat); } - /* -Flush all chunks to disk. Create any that are missing +Flush all modified chunks to disk. Create any that are missing and fill as needed. */ -int -ncz_write_var(NC_VAR_INFO_T* var) +static int +ncz_flush_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) { int stat = NC_NOERR; NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + NC_UNUSED(file); + ZTRACE(3,"var=%s",var->hdr.name); /* Flush the cache */ @@ -658,9 +312,9 @@ ncz_write_var(NC_VAR_INFO_T* var) size64_t* indices = nczodom_indices(chunkodom); /* Convert to key */ if((stat = NCZ_buildchunkpath(zvar->cache,indices,&key))) goto done; - switch (stat = nczmap_exists(map,key)) { + switch (stat = NCZMD_exists(file,key)) { case NC_NOERR: goto next; /* already exists */ - case NC_EEMPTY: break; /* does not exist, create it with fill */ + case NC_ENOOBJECT: break; /* does not exist, create it with fill */ default: goto done; /* some other error */ } /* If we reach here, then chunk does not exist, create it with fill */ @@ -680,642 +334,334 @@ ncz_write_var(NC_VAR_INFO_T* var) return ZUNTRACE(THROW(stat)); } +/**************************************************/ +/* + * @internal pull storage structures and create corresponding nc4internal.h structures + */ + /** - * @internal Synchronize attribute data from memory to map. - * - * @param file - * @param container Pointer to grp|var struct containing the attributes - * @param attlist - * @param jattsp - * @param jtypesp + + * @param file Pointer to file info struct. * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ int -ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, NCjson* jatts, NCjson* jtypes, int isclose) +ncz_decode_file(NC_FILE_INFO_T* file) { int stat = NC_NOERR; - size_t i; - NCZ_FILE_INFO_T* zinfo = NULL; - NCjson* jdimrefs = NULL; - NCjson* jdict = NULL; - NCjson* jint = NULL; - NCjson* jdata = NULL; - char* fullpath = NULL; - char* key = NULL; - char* content = NULL; - char* dimpath = NULL; - int isxarray = 0; - int inrootgroup = 0; - NC_VAR_INFO_T* var = NULL; - NC_GRP_INFO_T* grp = NULL; - char* tname = NULL; - int purezarr = 0; - int endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + NCZ_FILE_INFO_T* zinfo = file->format_file_info; + NC_GRP_INFO_T* root = NULL; + const NCjson* jsuper = NULL; + NClist* varnames = nclistnew(); + NClist* subgroupnames = nclistnew(); + struct ZOBJ zobj = NCZ_emptyzobj(); - NC_UNUSED(isclose); - - LOG((3, "%s", __func__)); - ZTRACE(3,"file=%s container=%s |attlist|=%u",file->controller->path,container->name,(unsigned)ncindexsize(attlist)); + LOG((3, "%s: file: %s", __func__, file->controller->path)); + ZTRACE(3,"file=%s",file->controller->path); - if(container->sort == NCVAR) { - var = (NC_VAR_INFO_T*)container; - if(var->container && var->container->parent == NULL) - inrootgroup = 1; - } else if(container->sort == NCGRP) { - grp = (NC_GRP_INFO_T*)container; - } + /* Download the root group object and associated attributes */ + root = file->root_grp; + if((stat = NCZF_download_grp(file, root, &zobj))) goto done; - zinfo = file->format_file_info; - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; - if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; - - if(ncindexsize(attlist) > 0) { - /* Walk all the attributes convert to json and collect the dtype */ - for(i=0;inc_typeid > NC_MAX_ATOMIC_TYPE) - {stat = (THROW(NC_ENCZARR)); goto done;} - if(a->nc_typeid == NC_STRING) - typesize = (size_t)NCZ_get_maxstrlen(container); - else - {if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done;} - /* Convert to storable json */ - if((stat = NCZ_stringconvert(a->nc_typeid,a->len,a->data,&jdata))) goto done; - - /* Collect the corresponding dtype */ - if((stat = ncz_nctype2dtype(a->nc_typeid,endianness,purezarr,typesize,&tname))) goto done; - - /* Insert the attribute; consumes jdata */ - if((stat = insert_attr(jatts,jtypes,a->hdr.name, jdata, tname))) goto done; - - /* cleanup */ - nullfree(tname); tname = NULL; - jdata = NULL; - - } - } - - /* Construct container path */ - if(container->sort == NCGRP) - stat = NCZ_grpkey(grp,&fullpath); - else - stat = NCZ_varkey(var,&fullpath); - if(stat) - goto done; - - if(container->sort == NCVAR) { - if(inrootgroup && isxarray) { - int dimsinroot = 1; - /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ - NCJnew(NCJ_ARRAY,&jdimrefs); - /* Fake the scalar case */ - if(var->ndims == 0) { - NCJaddstring(jdimrefs,NCJ_STRING,XARRAYSCALAR); - } else /* Walk the dimensions and capture the names */ - for(i=0;indims;i++) { - NC_DIM_INFO_T* dim = var->dim[i]; - /* Verify that the dimension is in the root group */ - if(dim->container && dim->container->parent != NULL) { - dimsinroot = 0; /* dimension is not in root */ - break; - } - } - if(dimsinroot) { - /* Walk the dimensions and capture the names */ - for(i=0;indims;i++) { - char* dimname; - NC_DIM_INFO_T* dim = var->dim[i]; - dimname = strdup(dim->hdr.name); - if(dimname == NULL) {stat = NC_ENOMEM; goto done;} - NCJaddstring(jdimrefs,NCJ_STRING,dimname); - nullfree(dimname); dimname = NULL; - } - /* Add the _ARRAY_DIMENSIONS attribute */ - if((stat = NCJinsert(jatts,NC_XARRAY_DIMS,jdimrefs))<0) {stat = NC_EINVAL; goto done;} - jdimrefs = NULL; - } - } - } - /* Add Quantize Attribute */ - if(container->sort == NCVAR && var && var->quantize_mode > 0) { - char mode[64]; - snprintf(mode,sizeof(mode),"%d",var->nsd); - NCJnewstring(NCJ_INT,mode,&jint); - /* Insert the quantize attribute */ - switch (var->quantize_mode) { - case NC_QUANTIZE_BITGROOM: - if((stat = NCJinsert(jatts,NC_QUANTIZE_BITGROOM_ATT_NAME,jint))<0) {stat = NC_EINVAL; goto done;} - jint = NULL; - break; - case NC_QUANTIZE_GRANULARBR: - if((stat = NCJinsert(jatts,NC_QUANTIZE_GRANULARBR_ATT_NAME,jint))<0) {stat = NC_EINVAL; goto done;} - jint = NULL; - break; - case NC_QUANTIZE_BITROUND: - if((stat = NCJinsert(jatts,NC_QUANTIZE_BITROUND_ATT_NAME,jint))<0) {stat = NC_EINVAL; goto done;} - jint = NULL; - break; - default: break; - } +#if 0 +Is this code needed? + switch(stat = NCZMD_is_metadata_consolidated(file)) { + case NC_NOERR: break; + case NC_ENOOBJECT: stat = NC_NOERR; break; + default: goto done; } +#endif -done: - nullfree(fullpath); - nullfree(key); - nullfree(content); - nullfree(dimpath); - nullfree(tname); - NCJreclaim(jdimrefs); - NCJreclaim(jdict); - NCJreclaim(jint); - NCJreclaim(jdata); - return ZUNTRACE(THROW(stat)); -} - - -/**************************************************/ - -/** -@internal Extract attributes from a group or var and return -the corresponding NCjson dict. -@param map - [in] the map object for storage -@param container - [in] the containing object -@param jattsp - [out] the json for .zattrs || NULL if not found -@param jtypesp - [out] the json attribute type dict || NULL -@param jnczgrp - [out] the json for _nczarr_group || NULL -@param jnczarray - [out] the json for _nczarr_array || NULL -@return NC_NOERR -@return NC_EXXX -@author Dennis Heimbigner -*/ -static int -download_jatts(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson** jattsp, const NCjson** jtypesp) -{ - int stat = NC_NOERR; - const NCjson* jatts = NULL; - const NCjson* jtypes = NULL; - const NCjson* jnczattr = NULL; - NC_GRP_INFO_T* grp = NULL; - NC_VAR_INFO_T* var = NULL; - NCZ_GRP_INFO_T* zgrp = NULL; - NCZ_VAR_INFO_T* zvar = NULL; - NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; - int purezarr = 0; - int zarrkey = 0; - - ZTRACE(3,"map=%p container=%s ",map,container->name); - - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; - zarrkey = (zinfo->controls.flags & FLAG_NCZARR_KEY)?1:0; + /* Decode the group metadata to get only the superblock */ + if((stat = NCZF_decode_group(file,root,&zobj,NULL,(NCjson**)&jsuper))) goto done; - if(container->sort == NCGRP) { - grp = (NC_GRP_INFO_T*)container; - zgrp = (NCZ_GRP_INFO_T*)grp->format_grp_info; - jatts = zgrp->zgroup.atts; + if(jsuper != NULL) { + /* Ok, process superblock */ + if((stat = NCZF_decode_superblock(file,jsuper,NULL,NULL))) goto done; } else { - var = (NC_VAR_INFO_T*)container; - zvar = (NCZ_VAR_INFO_T*)var->format_var_info; - jatts = zvar->zarray.atts; - } - assert(purezarr || zarrkey || jatts != NULL); + zinfo->flags |= FLAG_PUREZARR; + } + + /* Fill in the root object ignoring any superblock */ + if((stat = ncz_decode_grp(file,root,&zobj))) goto done; - if(jatts != NULL) { - /* Get _nczarr_attr from .zattrs */ - if((stat = NCJdictget(jatts,NCZ_V2_ATTR,&jnczattr))<0) {stat = NC_EINVAL; goto done;} - if(jnczattr != NULL) { - /* jnczattr attribute should be a dict */ - if(NCJsort(jnczattr) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} - /* Extract "types"; may not exist if only hidden attributes are defined */ - if((stat = NCJdictget(jnczattr,"types",&jtypes))<0) {stat = NC_EINVAL; goto done;} - if(jtypes != NULL) { - if(NCJsort(jtypes) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} - } - } - } - if(jattsp) {*jattsp = jatts; jatts = NULL;} - if(jtypes) {*jtypesp = jtypes; jtypes = NULL;} + /* Create and fill the subgroups for this group */ + if((stat = ncz_decode_subgrps(file,root,subgroupnames))) goto done; done: + NCZ_clear_zobj(&zobj); + nclistfreeall(varnames); + nclistfreeall(subgroupnames); return ZUNTRACE(THROW(stat)); } -/* Convert a JSON singleton or array of strings to a single string */ static int -zcharify(const NCjson* src, NCbytes* buf) +ncz_decode_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, NClist* subgrpnames) { int stat = NC_NOERR; size_t i; - struct NCJconst jstr = NCJconst_empty; - - if(NCJsort(src) != NCJ_ARRAY) { /* singleton */ - if((stat = NCJcvt(src, NCJ_STRING, &jstr))<0) {stat = NC_EINVAL; goto done;} - ncbytescat(buf,jstr.sval); - } else for(i=0;icontroller->path,parent->hdr.name,nclistlength(subgrpnames)); + + /* Create and load each subgrp in turn */ + for(i = 0; i < nclistlength(subgrpnames); i++) { + const char* subgrpname = (const char*)nclistget(subgrpnames,i); + NC_GRP_INFO_T* subgrp = NULL; + /* Create the group object */ + if((stat=ncz4_create_grp(file,parent,subgrpname,&subgrp))) goto done; + /* Download the group's metadata */ + if((stat = NCZF_download_grp(file,subgrp,&zobj))) goto done; + /* Fill in the group object */ + if((stat = ncz_decode_grp(file,subgrp,&zobj))) goto done; + NCZ_clear_zobj(&zobj); } - if(countp) *countp = count; done: + NCZ_clear_zobj(&zobj); return ZUNTRACE(THROW(stat)); } -/* -Extract type and data for an attribute -*/ +/** + * @internal Read group data from storage + * + * @param file Pointer to file struct + * @param grp Pointer to grp struct + * @param zobj the grp|atts for this grp; may be NULL + * + * @return ::NC_NOERR No error. + * @author Dennis Heimbigner + */ static int -computeattrinfo(const char* name, const NCjson* jtypes, nc_type typehint, int purezarr, NCjson* values, - nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap) +ncz_decode_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, struct ZOBJ* zobj) { int stat = NC_NOERR; size_t i; - size_t len, typelen; - void* data = NULL; - nc_type typeid; - - ZTRACE(3,"name=%s typehint=%d purezarr=%d values=|%s|",name,typehint,purezarr,NCJtotext(values)); - - /* Get type info for the given att */ - typeid = NC_NAT; - for(i=0;i NC_MAX_ATOMIC_TYPE) - {stat = NC_EINTERNAL; goto done;} - /* Use the hint if given one */ - if(typeid == NC_NAT) - typeid = typehint; - - if((stat = computeattrdata(typehint, &typeid, values, &typelen, &len, &data))) goto done; + NCZ_FILE_INFO_T* zinfo = file->format_file_info; + NClist* varnames = nclistnew(); + NClist* subgrps = nclistnew(); + NClist* dimdefs = nclistnew(); + int purezarr = 0; + const NCjson* jnczgrp = NULL; - if(typeidp) *typeidp = typeid; - if(lenp) *lenp = len; - if(typelenp) *typelenp = typelen; - if(datap) {*datap = data; data = NULL;} + ZTRACE(3,"grp=%s",grp->hdr.name); -done: - nullfree(data); - return ZUNTRACEX(THROW(stat),"typeid=%d typelen=%d len=%u",*typeidp,*typelenp,*lenp); -} + TESTPUREZARR; -/* -Extract data for an attribute -*/ -static int -computeattrdata(nc_type typehint, nc_type* typeidp, const NCjson* values, size_t* typelenp, size_t* countp, void** datap) -{ - int stat = NC_NOERR; - NCbytes* buf = ncbytesnew(); - size_t typelen; - nc_type typeid = NC_NAT; - NCjson* jtext = NULL; - int reclaimvalues = 0; - int isjson = 0; /* 1 => attribute value is neither scalar nor array of scalars */ - int count = 0; /* no. of attribute values */ - - ZTRACE(3,"typehint=%d typeid=%d values=|%s|",typehint,*typeidp,NCJtotext(values)); - - /* Get assumed type */ - if(typeidp) typeid = *typeidp; - if(typeid == NC_NAT && !isjson) { - if((stat = NCZ_inferattrtype(values,typehint, &typeid))) goto done; + /* Decode the group metadata */ + if((stat = NCZF_decode_group(file,grp,zobj,(NCjson**)&jnczgrp,NULL))) goto done; + if(!purezarr && zobj->jobj == NULL) {stat = NC_ENOTZARR; goto done;} + if(purezarr) { + if((stat = get_group_content_pure(file,grp,varnames,subgrps))) goto done; + } else { /*!purezarr*/ + /* Decode the _nczarr_group */ + if((stat = NCZF_decode_nczarr_group(file,grp,jnczgrp,varnames,subgrps,dimdefs))) goto done; } - /* See if this is a simple vector (or scalar) of atomic types */ - isjson = NCZ_iscomplexjson(values,typeid); - - if(isjson) { - /* Apply the JSON attribute convention and convert to JSON string */ - typeid = NC_CHAR; - if((stat = json_convention_read(values,&jtext))) goto done; - values = jtext; jtext = NULL; - reclaimvalues = 1; - } - - if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) - goto done; + /* Declare the dimensions in this group */ + for(i=0;ijatts))) goto done; - if(typelenp) *typelenp = typelen; - if(typeidp) *typeidp = typeid; /* return possibly inferred type */ - if(countp) *countp = (size_t)count; - if(datap) *datap = ncbytesextract(buf); + if(nclistlength(subgrps) > 0) { + /* Define sub-groups */ + if((stat = ncz_decode_subgrps(file,grp,subgrps))) goto done; + } + if(nclistlength(varnames) > 0) { + /* Define vars taking xarray into account */ + if((stat = ncz_decode_vars(file,grp,varnames))) goto done; + } + done: - ncbytesfree(buf); - if(reclaimvalues) NCJreclaim((NCjson*)values); /* we created it */ - return ZUNTRACEX(THROW(stat),"typelen=%d count=%u",(typelenp?*typelenp:0),(countp?*countp:-1)); + NCZ_reclaim_diminfo_list(dimdefs); + nclistfreeall(varnames); + nclistfreeall(subgrps); + return ZUNTRACE(THROW(stat)); } /** - * @internal Read file data from map to memory. + * @internal Materialize single var into memory; + * Take xarray and purezarr into account. * * @param file Pointer to file info struct. + * @param parent Pointer to parent grp info struct. + * @param varname name of variable in this group * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ -int -ncz_read_file(NC_FILE_INFO_T* file) +static int +ncz_decode_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* varname) { int stat = NC_NOERR; - NCjson* json = NULL; + NC_VAR_INFO_T* var = NULL; + struct ZOBJ zobj = NCZ_emptyzobj(); + NClist* filters = nclistnew(); + NClist* dimrefs = nclistnew(); /* NClist */ + NClist* dimdecls = nclistnew(); /* NClist */ + size64_t* shapes = NULL; + size64_t* chunks = NULL; + size_t i, rank; + NCbytes* fqn = ncbytesnew(); + NCZ_VAR_INFO_T* zvar = NULL; - LOG((3, "%s: file: %s", __func__, file->controller->path)); - ZTRACE(3,"file=%s",file->controller->path); + ZTRACE(3,"file=%s parent=%s varname=%s",file->controller->path,parent->hdr.name,varname); + + /* Create and Download */ + if((stat = ncz4_create_var(file,parent,varname,&var))) goto done; + zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if((stat = NCZF_download_var(file,var,&zobj))) goto done; + if((stat=NCZF_decode_var(file,var,&zobj,filters,&shapes,&chunks,dimrefs))) goto done; + rank = var->ndims; + assert(zvar->scalar || nclistlength(dimrefs)==rank); + assert(rank == 0 || var->dim != NULL); + assert(rank == 0 || var->dimids != NULL); + + if(rank > 0) { + /* Convert dimrefs to corresponding dimdecls */ + if((stat = reifydimrefs(file,parent,var,shapes,dimrefs,dimdecls))) goto done; + } - /* _nczarr should already have been read in ncz_open_dataset */ + /* Process chunks and shapes */ + assert(var->chunksizes == NULL); + if(rank == 0) { /* Scalar */ + /* Scalars still need a chunk and cache */ + var->dimids = NULL; + var->dim = NULL; + if((var->chunksizes = (size_t*)malloc(sizeof(size_t)))==NULL) {stat = NC_ENOMEM; goto done;} + var->chunksizes[0] = 1; + zvar->chunkproduct = 1; + } else { + if((var->chunksizes = (size_t*)malloc(rank * sizeof(size_t)))==NULL) {stat = NC_ENOMEM; goto done;} + zvar->chunkproduct = 1; + assert(nclistlength(dimdecls) == rank); + for(i=0;idim[i] = dim; + var->dimids[i] = dim->hdr.id; + var->chunksizes[i] = (size_t)chunks[i]; + zvar->chunkproduct *= var->chunksizes[i]; + } + } + zvar->chunksize = zvar->chunkproduct * var->type_info->size; - /* Now load the groups starting with root */ - if((stat = define_grp(file,file->root_grp))) + /* Create the cache */ + if((stat = NCZ_create_chunk_cache(var,var->type_info->size*zvar->chunkproduct,zvar->dimension_separator,&zvar->cache))) goto done; + /* Process attributes */ + if((stat=ncz_decode_atts(file,(NC_OBJ*)var,zobj.jatts))) goto done; + + /* Process filters */ + if((stat = ncz_decode_filters(file,var,filters))) goto done; + done: - NCJreclaim(json); + nullfree(shapes); + nullfree(chunks); + nclistfreeall(dimrefs); + nclistfree(dimdecls); + nclistfree(filters); + ncbytesfree(fqn); + NCZ_clear_zobj(&zobj); return ZUNTRACE(THROW(stat)); } /** - * @internal Read group data from map to memory + * @internal Materialize vars into memory; + * Take xarray and purezarr into account. * - * @param file Pointer to file struct - * @param grp Pointer to grp struct + * @param file Pointer to file info struct. + * @param grp Pointer to grp info struct. + * @param varnames List of names of variables in this group * * @return ::NC_NOERR No error. * @author Dennis Heimbigner */ static int -define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) +ncz_decode_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, NClist* varnames) { int stat = NC_NOERR; - NCZ_FILE_INFO_T* zinfo = NULL; - NCZ_GRP_INFO_T* zgrp = NULL; - char* fullpath = NULL; - char* key = NULL; - NCjson* json = NULL; - const NCjson* jgroup = NULL; - const NCjson* jattrs = NULL; - const NCjson* jnczgrp = NULL; - NClist* dimdefs = nclistnew(); - NClist* varnames = nclistnew(); - NClist* subgrps = nclistnew(); - int purezarr = 0; + size_t i; - LOG((3, "%s: dims: %s", __func__, key)); - ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); - - zinfo = file->format_file_info; - zgrp = grp->format_grp_info; - - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; - - /* Construct grp path */ - if((stat = NCZ_grpkey(grp,&fullpath))) goto done; - - /* Download .zgroup and .zattrs */ - if((stat = downloadzarrobj(file,&zgrp->zgroup,fullpath,ZGROUP))) goto done; - jgroup = zgrp->zgroup.obj; - jattrs = zgrp->zgroup.atts; + ZTRACE(3,"parent=%s |varnames|=%u",parent->hdr.name,nclistlength(varnames)); - if(purezarr) { - if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) - goto done; - purezarr = 1; - } else { /*!purezarr*/ - if(jgroup == NULL) { /* does not exist, use search */ - if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) goto done; - purezarr = 1; - } - if(jattrs == NULL) { /* does not exist, use search */ - if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) goto done; - purezarr = 1; - } else { /* Extract the NCZ_V2_GROUP attribute*/ - if((stat = getnczarrkey((NC_OBJ*)grp,NCZ_V2_GROUP,&jnczgrp))) goto done; - } - nullfree(key); key = NULL; - if(jnczgrp) { - /* Pull out lists about group content */ - if((stat = parse_group_content(jnczgrp,dimdefs,varnames,subgrps))) - goto done; - } - } - - if(!purezarr) { - /* Define dimensions */ - if((stat = define_dims(file,grp,dimdefs))) goto done; + /* Load each var in turn */ + for(i = 0; i < nclistlength(varnames); i++) { + const char* varname = (const char*)nclistget(varnames,i); + if((stat = ncz_decode_var1(file,parent,varname))) goto done; } - /* Define vars taking xarray into account */ - if((stat = define_vars(file,grp,varnames))) goto done; - - /* Define sub-groups */ - if((stat = define_subgrps(file,grp,subgrps))) goto done; - done: - NCJreclaim(json); - nclistfreeall(dimdefs); - nclistfreeall(varnames); - nclistfreeall(subgrps); - nullfree(fullpath); - nullfree(key); return ZUNTRACE(THROW(stat)); } - +/**************************************************/ /** @internal Read attributes from a group or var and create a list of annotated NC_ATT_INFO_T* objects. This will process _NCProperties attribute specially. -@param zfile - [in] the containing file (annotation) -@param container - [in] the containing object -@return NC_NOERR +@param file - [in] the containing file +@param container - [in] the containing object (group|var) +@param jatts - [in] the set of attributes from the container +@return ::NC_NOERR @author Dennis Heimbigner */ -int -ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) +static int +ncz_decode_atts(NC_FILE_INFO_T* file, NC_OBJ* container, const NCjson* jatts) { int stat = NC_NOERR; - size_t i; - char* fullpath = NULL; - char* key = NULL; - NCZ_FILE_INFO_T* zinfo = NULL; NC_VAR_INFO_T* var = NULL; - NCZ_VAR_INFO_T* zvar = NULL; - NC_GRP_INFO_T* grp = NULL; - NCZ_GRP_INFO_T* zgrp = NULL; - NC_ATT_INFO_T* att = NULL; - NCindex* attlist = NULL; - nc_type typeid; - size_t len, typelen; - void* data = NULL; - NC_ATT_INFO_T* fillvalueatt = NULL; - nc_type typehint = NC_NAT; - int purezarr,zarrkeys; - const NCjson* jattrs = NULL; - const NCjson* jtypes = NULL; - struct ZARROBJ* zobj = NULL; + NC_ATT_INFO_T* special = NULL; + struct NCZ_AttrInfo ainfo = NCZ_emptyAttrInfo(); ZTRACE(3,"file=%s container=%s",file->controller->path,container->name); - zinfo = file->format_file_info; - purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; - zarrkeys = (zinfo->controls.flags & FLAG_NCZARR_KEY)?1:0; - - if(container->sort == NCGRP) { - grp = ((NC_GRP_INFO_T*)container); - attlist = grp->att; - zgrp = (NCZ_GRP_INFO_T*)(grp->format_grp_info); - zobj = &zgrp->zgroup; - } else { - var = ((NC_VAR_INFO_T*)container); - attlist = var->att; - zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); - zobj = &zvar->zarray; + if(jatts != NULL) { + if((stat = NCZF_decode_attributes(file,container,jatts))) goto done; } - assert(purezarr || zarrkeys || zobj->obj != NULL); - - if((stat = download_jatts(file, container, &jattrs, &jtypes))) goto done; - - if(jattrs != NULL) { - /* Iterate over the attributes to create the in-memory attributes */ - /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray), etc. */ - for(i=0;iparent == NULL && strcmp(aname,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0) - isdfaltmaxstrlen = 1; - if(var != NULL && strcmp(aname,NC_NCZARR_MAXSTRLEN_ATTR)==0) - ismaxstrlen = 1; - - /* See if this is reserved attribute */ - ra = NC_findreserved(aname); - if(ra != NULL) { - /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ - if(strcmp(aname,NCPROPS)==0 && grp != NULL && file->root_grp == grp) { - /* Setup provenance */ - if(NCJsort(value) != NCJ_STRING) - {stat = (THROW(NC_ENCZARR)); goto done;} /*malformed*/ - if((stat = NCZ_read_provenance(file,aname,NCJstring(value)))) - goto done; - } - /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ - if(strcmp(aname,NC_XARRAY_DIMS)==0 && var != NULL && (ra->flags & HIDDENATTRFLAG)) { - /* store for later */ - size_t i; - assert(NCJsort(value) == NCJ_ARRAY); - if((zvar->xarray = nclistnew())==NULL) - {stat = NC_ENOMEM; goto done;} - for(i=0;ixarray,strdup(NCJstring(k))); - } - } - /* case other: if attribute is hidden */ - if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ - } - typehint = NC_NAT; - if(isfillvalue) - typehint = var->type_info->hdr.id ; /* if unknown use the var's type for _FillValue */ - /* Create the attribute */ - /* Collect the attribute's type and value */ - if((stat = computeattrinfo(aname,jtypes,typehint,purezarr,value, - &typeid,&typelen,&len,&data))) - goto done; - if((stat = ncz_makeattr(container,attlist,aname,typeid,len,data,&att))) - goto done; - /* No longer need this copy of the data */ - if((stat = NC_reclaim_data_all(file->controller,att->nc_typeid,data,len))) goto done; - data = NULL; - if(isfillvalue) - fillvalueatt = att; - if(ismaxstrlen && att->nc_typeid == NC_INT) - zvar->maxstrlen = ((int*)att->data)[0]; - if(isdfaltmaxstrlen && att->nc_typeid == NC_INT) - zinfo->default_maxstrlen = ((int*)att->data)[0]; + + /* Look for special per-var attributes */ + if(container->sort == NCVAR) + var = (NC_VAR_INFO_T*)container; + + /* _FillValue */ + if(var != NULL && !var->no_fill) { + special = NULL; + stat = nc4_find_grp_att(var->container,var->hdr.id,NC_FillValue,0,&special); + /* If we have not read a _FillValue attribute, then go ahead and create it */ + if(stat == NC_ENOTATT) { + stat = NC_NOERR; /*reset*/ + if((stat = NCZ_sync_dual_att(file,(NC_OBJ*)var,NC_FillValue, DA_FILLVALUE, FIXATT))) goto done; + } else if(stat != NC_NOERR) goto done; + } + + /* _Quantize_XXX */ + if(var != NULL) { + NC_ATT_INFO_T* qatt; + int mode; + /* Look for quantization attributes */ + for(qatt=NULL,mode=1;mode<=NC_QUANTIZE_MAX;mode++,qatt=NULL) { + const char* attmodename = NC_findquantizeattname(mode); /* get matching att name */ + /* See if this att is defined */ + stat = nc4_find_grp_att(var->container,var->hdr.id,attmodename,0,&qatt); + if(stat == NC_NOERR) {assert(qatt != NULL); break;} + if(stat != NC_ENOTATT) goto done; /* true error */ + /* else keep looking */ + } + stat = NC_NOERR; /* reset */ + if(qatt != NULL) { + if(qatt->len != 1 || qatt->data == NULL) {stat = NC_ENCZARR; goto done;} + /* extract the mode and NSD/NSB */ + var->quantize_mode = mode; + var->nsd = ((int*)qatt->data)[0]; } } - /* If we have not read a _FillValue, then go ahead and create it */ - if(fillvalueatt == NULL && container->sort == NCVAR) { - if((stat = ncz_create_fillvalue((NC_VAR_INFO_T*)container))) - goto done; - } /* Remember that we have read the atts for this var or group. */ if(container->sort == NCVAR) @@ -1324,1285 +670,181 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) ((NC_GRP_INFO_T*)container)->atts_read = 1; done: - if(data != NULL) - stat = NC_reclaim_data(file->controller,att->nc_typeid,data,len); - nullfree(fullpath); - nullfree(key); - return ZUNTRACE(THROW(stat)); + NCZ_clearAttrInfo(file,&ainfo); + return ZUNTRACE(THROW(stat)); } -/** - * @internal Materialize dimensions into memory - * - * @param file Pointer to file info struct. - * @param grp Pointer to grp info struct. - * @param diminfo List of (name,length,isunlimited) triples - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo) -{ - size_t i; - int stat = NC_NOERR; - - ZTRACE(3,"file=%s grp=%s |diminfo|=%u",file->controller->path,grp->hdr.name,nclistlength(diminfo)); - - /* Reify each dim in turn */ - for(i = 0; i < nclistlength(diminfo); i+=3) { - NC_DIM_INFO_T* dim = NULL; - size64_t len = 0; - long long isunlim = 0; - const char* name = nclistget(diminfo,i); - const char* slen = nclistget(diminfo,i+1); - const char* sisunlimited = nclistget(diminfo,i+2); - - /* Create the NC_DIM_INFO_T object */ - sscanf(slen,"%lld",&len); /* Get length */ - if(sisunlimited != NULL) - sscanf(sisunlimited,"%lld",&isunlim); /* Get unlimited flag */ - else - isunlim = 0; - if((stat = nc4_dim_list_add(grp, name, (size_t)len, -1, &dim))) - goto done; - dim->unlimited = (isunlim ? 1 : 0); - if((dim->format_dim_info = calloc(1,sizeof(NCZ_DIM_INFO_T))) == NULL) - {stat = NC_ENOMEM; goto done;} - ((NCZ_DIM_INFO_T*)dim->format_dim_info)->common.file = file; - } - -done: - return ZUNTRACE(THROW(stat)); -} - - -/** - * @internal Materialize single var into memory; - * Take xarray and purezarr into account. - * - * @param file Pointer to file info struct. - * @param grp Pointer to grp info struct. - * @param varname name of variable in this group - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ -static int -define_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) -{ - int stat = NC_NOERR; - size_t j; - NCZ_FILE_INFO_T* zinfo = NULL; - int purezarr = 0; - int xarray = 0; - /* per-variable info */ - NC_VAR_INFO_T* var = NULL; - NCZ_VAR_INFO_T* zvar = NULL; - const NCjson* jvar = NULL; - const NCjson* jatts = NULL; /* corresponding to jvar */ - const NCjson* jncvar = NULL; - const NCjson* jdimrefs = NULL; - const NCjson* jvalue = NULL; - char* varpath = NULL; - char* key = NULL; - size64_t* shapes = NULL; - NClist* dimnames = NULL; - int varsized = 0; - int suppress = 0; /* Abort processing of this variable */ - nc_type vtype = NC_NAT; - int vtypelen = 0; - size_t rank = 0; - size_t zarr_rank = 0; /* Need to watch out for scalars */ -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - const NCjson* jfilter = NULL; - int chainindex = 0; -#endif - - ZTRACE(3,"file=%s grp=%s varname=%s",file->controller->path,grp->hdr.name,varname); - - zinfo = file->format_file_info; - - if(zinfo->controls.flags & FLAG_PUREZARR) purezarr = 1; - if(zinfo->controls.flags & FLAG_XARRAYDIMS) {xarray = 1;} - - dimnames = nclistnew(); - - if((stat = nc4_var_list_add2(grp, varname, &var))) - goto done; - - /* And its annotation */ - if((zvar = calloc(1,sizeof(NCZ_VAR_INFO_T)))==NULL) - {stat = NC_ENOMEM; goto done;} - var->format_var_info = zvar; - zvar->common.file = file; - - /* pretend it was created */ - var->created = 1; - - /* Indicate we do not have quantizer yet */ - var->quantize_mode = -1; - - /* Construct var path */ - if((stat = NCZ_varkey(var,&varpath))) - goto done; - - /* Download */ - if((stat = downloadzarrobj(file,&zvar->zarray,varpath,ZARRAY))) goto done; - jvar = zvar->zarray.obj; - jatts = zvar->zarray.atts; - assert(jvar == NULL || NCJsort(jvar) == NCJ_DICT); - assert(jatts == NULL || NCJsort(jatts) == NCJ_DICT); - - /* Verify the format */ - { - int version; - if((stat = NCJdictget(jvar,"zarr_format",&jvalue))<0) {stat = NC_EINVAL; goto done;} - sscanf(NCJstring(jvalue),"%d",&version); - if(version != zinfo->zarr.zarr_version) - {stat = (THROW(NC_ENCZARR)); goto done;} - } - - /* Set the type and endianness of the variable */ - { - int endianness; - if((stat = NCJdictget(jvar,"dtype",&jvalue))<0) {stat = NC_EINVAL; goto done;} - /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen))) - goto done; - if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { - /* Locate the NC_TYPE_INFO_T object */ - if((stat = ncz_gettype(file,grp,vtype,&var->type_info))) - goto done; - } else {stat = NC_EBADTYPE; goto done;} -#if 0 /* leave native in place */ - if(endianness == NC_ENDIAN_NATIVE) - endianness = zinfo->native_endianness; - if(endianness == NC_ENDIAN_NATIVE) - endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); - if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { - var->endianness = endianness; - } else {stat = NC_EBADTYPE; goto done;} -#else - var->endianness = endianness; -#endif - var->type_info->endianness = var->endianness; /* Propagate */ - if(vtype == NC_STRING) { - zvar->maxstrlen = vtypelen; - vtypelen = sizeof(char*); /* in-memory len */ - if(zvar->maxstrlen <= 0) zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var); - } - } - - if(!purezarr) { - if(jatts == NULL) {stat = NC_ENCZARR; goto done;} - /* Extract the _NCZARR_ARRAY values */ - /* Do this first so we know about storage esp. scalar */ - /* Extract the NCZ_V2_ARRAY dict */ - if((stat = getnczarrkey((NC_OBJ*)var,NCZ_V2_ARRAY,&jncvar))) goto done; - if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} - assert((NCJsort(jncvar) == NCJ_DICT)); - /* Extract scalar flag */ - if((stat = NCJdictget(jncvar,"scalar",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL) { - var->storage = NC_CHUNKED; - zvar->scalar = 1; - } - /* Extract storage flag */ - if((stat = NCJdictget(jncvar,"storage",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL) - var->storage = NC_CHUNKED; - /* Extract dimrefs list */ - if((stat = dictgetalt(jncvar,"dimension_references","dimrefs",&jdimrefs))) goto done; - if(jdimrefs != NULL) { /* Extract the dimref names */ - assert((NCJsort(jdimrefs) == NCJ_ARRAY)); - if(zvar->scalar) { - assert(NCJlength(jdimrefs) == 0); - } else { - rank = NCJlength(jdimrefs); - for(j=0;jdimension_separator = 0; - if((stat = NCJdictget(jvar,"dimension_separator",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL) { - /* Verify its value */ - if(NCJsort(jvalue) == NCJ_STRING && NCJstring(jvalue) != NULL && strlen(NCJstring(jvalue)) == 1) - zvar->dimension_separator = NCJstring(jvalue)[0]; - } - /* If value is invalid, then use global default */ - if(!islegaldimsep(zvar->dimension_separator)) - zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global value */ - assert(islegaldimsep(zvar->dimension_separator)); /* we are hosed */ - } - - /* fill_value; must precede calls to adjust cache */ - { - if((stat = NCJdictget(jvar,"fill_value",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue == NULL || NCJsort(jvalue) == NCJ_NULL) - var->no_fill = 1; - else { - size_t fvlen; - nc_type atypeid = vtype; - var->no_fill = 0; - if((stat = computeattrdata(var->type_info->hdr.id, &atypeid, jvalue, NULL, &fvlen, &var->fill_value))) - goto done; - assert(atypeid == vtype); - /* Note that we do not create the _FillValue - attribute here to avoid having to read all - the attributes and thus foiling lazy read.*/ - } - } - - /* shape */ - { - if((stat = NCJdictget(jvar,"shape",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(NCJsort(jvalue) != NCJ_ARRAY) {stat = (THROW(NC_ENCZARR)); goto done;} - - /* Process the rank */ - zarr_rank = NCJlength(jvalue); - if(zarr_rank == 0) { - /* suppress variable */ - ZLOG(NCLOGWARN,"Empty shape for variable %s suppressed",var->hdr.name); - suppress = 1; - goto suppressvar; - } - - if(zvar->scalar) { - rank = 0; - zarr_rank = 1; /* Zarr does not support scalars */ - } else - rank = (zarr_rank = NCJlength(jvalue)); - - if(zarr_rank > 0) { - /* Save the rank of the variable */ - if((stat = nc4_var_set_ndims(var, rank))) goto done; - /* extract the shapes */ - if((shapes = (size64_t*)malloc(sizeof(size64_t)*(size_t)zarr_rank)) == NULL) - {stat = (THROW(NC_ENOMEM)); goto done;} - if((stat = decodeints(jvalue, shapes))) goto done; - } - } - - /* chunks */ - { - size64_t chunks[NC_MAX_VAR_DIMS]; - if((stat = NCJdictget(jvar,"chunks",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) - {stat = (THROW(NC_ENCZARR)); goto done;} - /* Verify the rank */ - if(zvar->scalar || zarr_rank == 0) { - if(var->ndims != 0) - {stat = (THROW(NC_ENCZARR)); goto done;} - zvar->chunkproduct = 1; - zvar->chunksize = zvar->chunkproduct * var->type_info->size; - /* Create the cache */ - if((stat = NCZ_create_chunk_cache(var,var->type_info->size*zvar->chunkproduct,zvar->dimension_separator,&zvar->cache))) - goto done; - } else {/* !zvar->scalar */ - if(zarr_rank == 0) {stat = NC_ENCZARR; goto done;} - var->storage = NC_CHUNKED; - if(var->ndims != rank) - {stat = (THROW(NC_ENCZARR)); goto done;} - if((var->chunksizes = malloc(sizeof(size_t)*(size_t)zarr_rank)) == NULL) - {stat = NC_ENOMEM; goto done;} - if((stat = decodeints(jvalue, chunks))) goto done; - /* validate the chunk sizes */ - zvar->chunkproduct = 1; - for(j=0;jchunksizes[j] = (size_t)chunks[j]; - zvar->chunkproduct *= chunks[j]; - } - zvar->chunksize = zvar->chunkproduct * var->type_info->size; - /* Create the cache */ - if((stat = NCZ_create_chunk_cache(var,var->type_info->size*zvar->chunkproduct,zvar->dimension_separator,&zvar->cache))) - goto done; - } - if((stat = NCZ_adjust_var_cache(var))) goto done; - } - /* Capture row vs column major; currently, column major not used*/ - { - if((stat = NCJdictget(jvar,"order",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(strcmp(NCJstring(jvalue),"C") > 0) - ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 1; - else ((NCZ_VAR_INFO_T*)var->format_var_info)->order = 0; - } - /* filters key */ - /* From V2 Spec: A list of JSON objects providing codec configurations, - or null if no filters are to be applied. Each codec configuration - object MUST contain a "id" key identifying the codec to be used. */ - /* Do filters key before compressor key so final filter chain is in correct order */ - { -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - if(var->filters == NULL) var->filters = (void*)nclistnew(); - if(zvar->incompletefilters == NULL) zvar->incompletefilters = (void*)nclistnew(); - chainindex = 0; /* track location of filter in the chain */ - if((stat = NCZ_filter_initialize())) goto done; - if((stat = NCJdictget(jvar,"filters",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL && NCJsort(jvalue) != NCJ_NULL) { - int k; - if(NCJsort(jvalue) != NCJ_ARRAY) {stat = NC_EFILTER; goto done;} - for(k=0;;k++) { - jfilter = NULL; - jfilter = NCJith(jvalue,k); - if(jfilter == NULL) break; /* done */ - if(NCJsort(jfilter) != NCJ_DICT) {stat = NC_EFILTER; goto done;} - if((stat = NCZ_filter_build(file,var,jfilter,chainindex++))) goto done; - } - } -#endif - } - - /* compressor key */ - /* From V2 Spec: A JSON object identifying the primary compression codec and providing - configuration parameters, or ``null`` if no compressor is to be used. */ -#ifdef NETCDF_ENABLE_NCZARR_FILTERS - { - if(var->filters == NULL) var->filters = (void*)nclistnew(); - if((stat = NCZ_filter_initialize())) goto done; - if((stat = NCJdictget(jvar,"compressor",&jfilter))<0) {stat = NC_EINVAL; goto done;} - if(jfilter != NULL && NCJsort(jfilter) != NCJ_NULL) { - if(NCJsort(jfilter) != NCJ_DICT) {stat = NC_EFILTER; goto done;} - if((stat = NCZ_filter_build(file,var,jfilter,chainindex++))) goto done; - } - } - /* Suppress variable if there are filters and var is not fixed-size */ - if(varsized && nclistlength((NClist*)var->filters) > 0) - suppress = 1; -#endif - if(zarr_rank > 0) { - if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim))) - goto done; - if(!zvar->scalar) { - /* Extract the dimids */ - for(j=0;jdimids[j] = var->dim[j]->hdr.id; - } - } - +/**************************************************/ #ifdef NETCDF_ENABLE_NCZARR_FILTERS - if(!suppress) { - /* At this point, we can finalize the filters */ - if((stat = NCZ_filter_setup(var))) goto done; - } -#endif -suppressvar: - if(suppress) { - /* Reclaim NCZarr variable specific info */ - (void)NCZ_zclose_var1(var); - /* Remove from list of variables and reclaim the top level var object */ - (void)nc4_var_list_del(grp, var); - var = NULL; - } - -done: - nclistfreeall(dimnames); dimnames = NULL; - nullfree(varpath); varpath = NULL; - nullfree(shapes); shapes = NULL; - nullfree(key); key = NULL; - return THROW(stat); -} - -/** - * @internal Materialize vars into memory; - * Take xarray and purezarr into account. - * - * @param file Pointer to file info struct. - * @param grp Pointer to grp info struct. - * @param varnames List of names of variables in this group - * - * @return ::NC_NOERR No error. - * @author Dennis Heimbigner - */ static int -define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) +ncz_encode_filters(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj) { int stat = NC_NOERR; size_t i; + NClist* filters = (NClist*)var->filters; + NCjson* jfilter = NULL; - ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames)); - - /* Load each var in turn */ - for(i = 0; i < nclistlength(varnames); i++) { - const char* varname = (const char*)nclistget(varnames,i); - if((stat = define_var1(file,grp,varname))) goto done; - varname = nclistget(varnames,i); + for(i=0;icontroller->path,grp->hdr.name,nclistlength(subgrpnames)); - - /* Load each subgroup name in turn */ - for(i = 0; i < nclistlength(subgrpnames); i++) { - NC_GRP_INFO_T* g = NULL; - const char* gname = nclistget(subgrpnames,i); - char norm_name[NC_MAX_NAME]; - /* Check and normalize the name. */ - if((stat = nc4_check_name(gname, norm_name))) - goto done; - if((stat = nc4_grp_list_add(file, grp, norm_name, &g))) - goto done; - if(!(g->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T)))) - {stat = NC_ENOMEM; goto done;} - ((NCZ_GRP_INFO_T*)g->format_grp_info)->common.file = file; - } - - /* Recurse to fill in subgroups */ - for(i=0;ichildren);i++) { - NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); - if((stat = define_grp(file,g))) - goto done; - } - -done: - return ZUNTRACE(THROW(stat)); -} - -int -ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) -{ - int stat = NC_NOERR; - const NCjson* jnczgroup = NULL; - const NCjson* jnczattr = NULL; - const NCjson* jzgroup = NULL; - const NCjson* jsuper = NULL; - const NCjson* jtmp = NULL; - char* nczarr_version = NULL; - char* zarr_format = NULL; - NCZ_FILE_INFO_T* zinfo = NULL; - NC_GRP_INFO_T* root = NULL; - NCZ_GRP_INFO_T* zroot = NULL; - char* fullpath = NULL; - - ZTRACE(3,"file=%s",file->controller->path); - - root = file->root_grp; - assert(root != NULL); - - zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; - zroot = (NCZ_GRP_INFO_T*)root->format_grp_info; - - /* Construct grp key */ - if((stat = NCZ_grpkey(root,&fullpath))) goto done; - - /* Download the root group .zgroup and associated .zattrs */ - if((stat = downloadzarrobj(file, &zroot->zgroup, fullpath, ZGROUP))) goto done; - jzgroup = zroot->zgroup.obj; - - /* Look for superblock; first in .zattrs and then in .zgroup */ - if((stat = getnczarrkey((NC_OBJ*)root,NCZ_V2_SUPERBLOCK,&jsuper))) goto done; - - /* Set the format flags */ - - /* Set where _nczarr_xxx are stored */ - if(jsuper != NULL && zroot->zgroup.nczv1) { - zinfo->controls.flags |= FLAG_NCZARR_KEY; - /* Also means file is read only */ - file->no_write = 1; - } - - if(jsuper == NULL) { - /* See if this is looks like a NCZarr/Zarr dataset at all - by looking for anything here of the form ".z*" */ - if((stat = ncz_validate(file))) goto done; - /* ok, assume pure zarr with no groups */ - zinfo->controls.flags |= FLAG_PUREZARR; - if(zarr_format == NULL) zarr_format = strdup("2"); - } - - /* Look for _nczarr_group */ - if((stat = getnczarrkey((NC_OBJ*)root,NCZ_V2_GROUP,&jnczgroup))) goto done; - - /* Look for _nczarr_attr*/ - if((stat = getnczarrkey((NC_OBJ*)root,NCZ_V2_ATTR,&jnczattr))) goto done; - - if(jsuper != NULL) { - if(jsuper->sort != NCJ_DICT) {stat = NC_ENCZARR; goto done;} - if((stat = dictgetalt(jsuper,"nczarr_version","version",&jtmp))<0) {stat = NC_EINVAL; goto done;} - nczarr_version = nulldup(NCJstring(jtmp)); - } - - if(jzgroup != NULL) { - if(jzgroup->sort != NCJ_DICT) {stat = NC_ENCZARR; goto done;} - /* In any case, extract the zarr format */ - if((stat = NCJdictget(jzgroup,"zarr_format",&jtmp))<0) {stat = NC_EINVAL; goto done;} - if(zarr_format == NULL) - zarr_format = nulldup(NCJstring(jtmp)); - else if(strcmp(zarr_format,NCJstring(jtmp))!=0) - {stat = NC_ENCZARR; goto done;} - } - - if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;} - if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;} -done: - nullfree(fullpath); - nullfree(zarr_format); - nullfree(nczarr_version); - return ZUNTRACE(THROW(stat)); -} - -/**************************************************/ -/* Utilities */ - +@internal Read filter codecs from avar and apply them +to a variable. +@param file - [in] the containing file +@param var - [in] the containing var +@return ::NC_NOERR || NC_EXXX +@author Dennis Heimbigner +*/ static int -parse_group_content(const NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps) +ncz_decode_filters(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const NClist* filters) { int stat = NC_NOERR; - size_t i; - const NCjson* jvalue = NULL; - - ZTRACE(3,"jcontent=|%s| |dimdefs|=%u |varnames|=%u |subgrps|=%u",NCJtotext(jcontent),(unsigned)nclistlength(dimdefs),(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); - - if((stat=dictgetalt(jcontent,"dimensions","dims",&jvalue))) goto done; - if(jvalue != NULL) { - if(NCJsort(jvalue) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} - /* Extract the dimensions defined in this group */ - for(i=0;icontroller->path,var->hdr.name); - if((stat = NCJdictget(jcontent,"groups",&jvalue))<0) {stat = NC_EINVAL; goto done;} - if(jvalue != NULL) { - /* Extract the subgroup names in this group */ - for(i=0;i 0) { + size_t i; + for(i=0;icommon.file->controller->path,grp->hdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); - - nclistclear(varnames); - if((stat = searchvars(zinfo,grp,varnames))) goto done; - nclistclear(subgrps); - if((stat = searchsubgrps(zinfo,grp,subgrps))) goto done; - -done: - return ZUNTRACE(THROW(stat)); -} - - -static int -searchvars(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* varnames) -{ - size_t i; - int stat = NC_NOERR; - char* grpkey = NULL; - char* varkey = NULL; - char* zarray = NULL; - NClist* matches = nclistnew(); - - /* Compute the key for the grp */ - if((stat = NCZ_grpkey(grp,&grpkey))) goto done; - /* Get the map and search group */ - if((stat = nczmap_search(zfile->map,grpkey,matches))) goto done; - for(i=0;imap,zarray)) == NC_NOERR) - nclistpush(varnames,strdup(name)); - stat = NC_NOERR; - nullfree(varkey); varkey = NULL; - nullfree(zarray); zarray = NULL; - } - -done: - nullfree(grpkey); - nullfree(varkey); - nullfree(zarray); - nclistfreeall(matches); - return stat; -} - -static int -searchsubgrps(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* subgrpnames) -{ - size_t i; - int stat = NC_NOERR; - char* grpkey = NULL; - char* subkey = NULL; - char* zgroup = NULL; - NClist* matches = nclistnew(); - - /* Compute the key for the grp */ - if((stat = NCZ_grpkey(grp,&grpkey))) goto done; - /* Get the map and search group */ - if((stat = nczmap_search(zfile->map,grpkey,matches))) goto done; - for(i=0;imap,zgroup)) == NC_NOERR) - nclistpush(subgrpnames,strdup(name)); - stat = NC_NOERR; - nullfree(subkey); subkey = NULL; - nullfree(zgroup); zgroup = NULL; - } - -done: - nullfree(grpkey); - nullfree(subkey); - nullfree(zgroup); - nclistfreeall(matches); - return stat; -} - -/* Convert a list of integer strings to 64 bit dimension sizes (shapes) */ -static int -decodeints(const NCjson* jshape, size64_t* shapes) -{ - int stat = NC_NOERR; - size_t i; - - for(i=0;iroot_grp; - NC_DIM_INFO_T* thed = NULL; - if((stat = nc4_dim_list_add(root, name, (size_t)dimlen, -1, &thed))) - goto done; - assert(thed != NULL); - /* Create struct for NCZ-specific dim info. */ - if (!(thed->format_dim_info = calloc(1, sizeof(NCZ_DIM_INFO_T)))) - {stat = NC_ENOMEM; goto done;} - ((NCZ_DIM_INFO_T*)thed->format_dim_info)->common.file = file; - *dimp = thed; thed = NULL; -done: - return stat; -} - - -/* -Given a list of segments, find corresponding group. -*/ -static int -locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_GRP_INFO_T** grpp) -{ - size_t i, j; - int found, stat = NC_NOERR; - NC_GRP_INFO_T* grp = NULL; - - grp = file->root_grp; - for(i=0;ichildren);j++) { - NC_GRP_INFO_T* subgrp = (NC_GRP_INFO_T*)ncindexith(grp->children,j); - if(strcmp(subgrp->hdr.name,norm_name)==0) { - grp = subgrp; - found = 1; - break; - } - } - if(!found) {stat = NC_ENOGRP; goto done;} - } - /* grp should be group of interest */ - if(grpp) *grpp = grp; - -done: - return THROW(stat); -} - +#else static int -parsedimrefs(NC_FILE_INFO_T* file, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create) -{ - size_t i; - int stat = NC_NOERR; - NClist* segments = NULL; - - for(i=0;idim);j++) { - d = (NC_DIM_INFO_T*)ncindexith(g->dim,j); - if(strcmp(d->hdr.name,dimname)==0) { - dims[i] = d; - break; - } - } - if(dims[i] == NULL && create) { - /* If not found and create then create it */ - if((stat = createdim(file, dimname, shape[i], &dims[i]))) - goto done; - } else { - /* Verify consistency */ - if(dims[i]->len != shape[i]) - {stat = NC_EDIMSIZE; goto done;} - } - assert(dims[i] != NULL); - } -done: - nclistfreeall(segments); - return THROW(stat); -} - -/** - * @internal Get the metadata for a variable. - * - * @param var Pointer to var info struct. - * - * @return ::NC_NOERR No error. - * @return ::NC_EBADID Bad ncid. - * @return ::NC_ENOMEM Out of memory. - * @return ::NC_EHDFERR HDF5 returned error. - * @return ::NC_EVARMETA Error with var metadata. - * @author Ed Hartnett - */ -int -ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) +ncz_encode_filters(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NClist* filtersj) { - int retval = NC_NOERR; - - assert(file && var && var->format_var_info); - LOG((3, "%s: var %s", __func__, var->hdr.name)); - ZTRACE(3,"file=%s var=%s",file->controller->path,var->hdr.name); - - /* Have we already read the var metadata? */ - if (var->meta_read) - goto done; - -#ifdef LOOK - /* Get the current chunk cache settings. */ - if ((access_pid = H5Dget_access_plist(hdf5_var->hdf_datasetid)) < 0) - BAIL(NC_EVARMETA); - - /* Learn about current chunk cache settings. */ - if ((H5Pget_chunk_cache(access_pid, &(var->chunk_cache_nelems), - &(var->chunk_cache_size), &rdcc_w0)) < 0) - BAIL(NC_EHDFERR); - var->chunk_cache_preemption = rdcc_w0; - - /* Get the dataset creation properties. */ - if ((propid = H5Dget_create_plist(hdf5_var->hdf_datasetid)) < 0) - BAIL(NC_EHDFERR); - - /* Get var chunking info. */ - if ((retval = get_chunking_info(propid, var))) - BAIL(retval); - - /* Get filter info for a var. */ - if ((retval = get_filter_info(propid, var))) - BAIL(retval); - - /* Get fill value, if defined. */ - if ((retval = get_fill_info(propid, var))) - BAIL(retval); - - /* Is this a deflated variable with a chunksize greater than the - * current cache size? */ - if ((retval = nc4_adjust_var_cache(var))) - BAIL(retval); - - /* Is there an attribute which means quantization was used? */ - if ((retval = get_quantize_info(var))) - BAIL(retval); - - if (var->coords_read && !var->dimscale) - if ((retval = get_attached_info(var, hdf5_var, var->ndims, hdf5_var->hdf_datasetid))) - goto done;; -#endif - - /* Remember that we have read the metadata for this var. */ - var->meta_read = NC_TRUE; -done: - return ZUNTRACE(retval); + NC_UNUSED(file); + NC_UNUSED(var); + NC_UNUSED(filtersj); + return NC_NOERR; } -/* Compute the set of dim refs for this variable, taking purezarr and xarray into account */ static int -computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims) +ncz_decode_filters(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const NClist* filters) { - int stat = NC_NOERR; - size_t i; - int createdims = 0; /* 1 => we need to create the dims in root if they do not already exist */ - NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); - NCjson* jatts = NULL; - - ZTRACE(3,"file=%s var=%s purezarr=%d xarray=%d ndims=%d shape=%s", - file->controller->path,var->hdr.name,purezarr,xarray,(int)ndims,nczprint_vector(ndims,shapes)); - assert(zfile && zvar); - - if(purezarr && xarray) {/* Read in the attributes to get xarray dimdef attribute; Note that it might not exist */ - /* Note that if xarray && !purezarr, then xarray will be superceded by the nczarr dimensions key */ - char zdimname[4096]; - if(zvar->xarray == NULL) { - assert(nclistlength(dimnames) == 0); - if((stat = ncz_read_atts(file,(NC_OBJ*)var))) goto done; - } - if(zvar->xarray != NULL) { - /* convert xarray to the dimnames */ - for(i=0;ixarray);i++) { - snprintf(zdimname,sizeof(zdimname),"/%s",(const char*)nclistget(zvar->xarray,i)); - nclistpush(dimnames,strdup(zdimname)); - } - } - createdims = 1; /* may need to create them */ - } - - /* If pure zarr and we have no dimref names, then fake it */ - if(purezarr && nclistlength(dimnames) == 0) { - int i; - createdims = 1; - for(i=0;ihdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); -/* See if there is reason to believe the specified path is a legitimate (NC)Zarr file - * Do a breadth first walk of the tree starting at file path. - * @param file to validate - * @return NC_NOERR if it looks ok - * @return NC_ENOTNC if it does not look ok - */ -static int -ncz_validate(NC_FILE_INFO_T* file) -{ - int stat = NC_NOERR; - NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; - int validate = 0; - NCbytes* prefix = ncbytesnew(); - NClist* queue = nclistnew(); - NClist* nextlevel = nclistnew(); - NCZMAP* map = zinfo->map; - char* path = NULL; - char* segment = NULL; - size_t seglen; - - ZTRACE(3,"file=%s",file->controller->path); + nclistclear(varnames); + if((stat = NCZF_searchobjects(file,grp,varnames,subgrps))) goto done; - path = strdup("/"); - nclistpush(queue,path); - path = NULL; - do { - nullfree(path); path = NULL; - /* This should be full path key */ - path = nclistremove(queue,0); /* remove from front of queue */ - /* get list of next level segments (partial keys) */ - assert(nclistlength(nextlevel)==0); - if((stat=nczmap_search(map,path,nextlevel))) {validate = 0; goto done;} - /* For each s in next level, test, convert to full path, and push onto queue */ - while(nclistlength(nextlevel) > 0) { - segment = nclistremove(nextlevel,0); - seglen = nulllen(segment); - if((seglen >= 2 && memcmp(segment,".z",2)==0) || (seglen >= 4 && memcmp(segment,".ncz",4)==0)) { - validate = 1; - goto done; - } - /* Convert to full path */ - ncbytesclear(prefix); - ncbytescat(prefix,path); - if(strlen(path) > 1) ncbytescat(prefix,"/"); - ncbytescat(prefix,segment); - /* push onto queue */ - nclistpush(queue,ncbytesextract(prefix)); - nullfree(segment); segment = NULL; - } - } while(nclistlength(queue) > 0); done: - if(!validate) stat = NC_ENOTNC; - nullfree(path); - nullfree(segment); - nclistfreeall(queue); - nclistfreeall(nextlevel); - ncbytesfree(prefix); return ZUNTRACE(THROW(stat)); } /** Insert an attribute into a list of attribute, including typing -Takes control of javalue. +Takes control of javalue but not atype @param jatts @param jtypes @param aname -@param javalue +@param javaluep +@parame atypep */ -static int -insert_attr(NCjson* jatts, NCjson* jtypes, const char* aname, NCjson* javalue, const char* atype) +int +ncz_insert_attr(NCjson* jatts, NCjson* jtypes, const char* aname, NCjson** javaluep, const char* atype) { int stat = NC_NOERR; if(jatts != NULL) { - if(jtypes != NULL) { - NCJinsertstring(jtypes,aname,atype); - } - NCJinsert(jatts,aname,javalue); + if(jtypes != NULL) { + NCJinsertstring(jtypes,aname,atype); + } + NCJinsert(jatts,aname,*javaluep); + *javaluep = NULL; } return THROW(stat); } -/** -Insert _nczarr_attr into .zattrs -Take control of jtypes -@param jatts -@param jtypes -*/ -static int -insert_nczarr_attr(NCjson* jatts, NCjson* jtypes) -{ - NCjson* jdict = NULL; - if(jatts != NULL && jtypes != NULL) { - NCJinsertstring(jtypes,NCZ_V2_ATTR,"|J0"); /* type for _nczarr_attr */ - NCJnew(NCJ_DICT,&jdict); - NCJinsert(jdict,"types",jtypes); - NCJinsert(jatts,NCZ_V2_ATTR,jdict); - jdict = NULL; - } - return NC_NOERR; -} - -/** -Upload a .zattrs object -Optionally take control of jatts and jtypes -@param file -@param container -@param jattsp -@param jtypesp -*/ -static int -upload_attrs(NC_FILE_INFO_T* file, NC_OBJ* container, NCjson* jatts) -{ - int stat = NC_NOERR; - NCZ_FILE_INFO_T* zinfo = NULL; - NC_VAR_INFO_T* var = NULL; - NC_GRP_INFO_T* grp = NULL; - NCZMAP* map = NULL; - char* fullpath = NULL; - char* key = NULL; - - ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); - - if(jatts == NULL) goto done; - - zinfo = file->format_file_info; - map = zinfo->map; - - if(container->sort == NCVAR) { - var = (NC_VAR_INFO_T*)container; - } else if(container->sort == NCGRP) { - grp = (NC_GRP_INFO_T*)container; - } - - /* Construct container path */ - if(container->sort == NCGRP) - stat = NCZ_grpkey(grp,&fullpath); - else - stat = NCZ_varkey(var,&fullpath); - if(stat) goto done; - - /* write .zattrs*/ - if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; - if((stat=NCZ_uploadjson(map,key,jatts))) goto done; - nullfree(key); key = NULL; - -done: - nullfree(fullpath); - return ZUNTRACE(THROW(stat)); -} - -#if 0 -/** -@internal Get contents of a meta object; fail it it does not exist -@param zmap - [in] map -@param key - [in] key of the object -@param jsonp - [out] return parsed json || NULL if not exists -@return NC_NOERR -@return NC_EXXX -@author Dennis Heimbigner -*/ -static int -readarray(NCZMAP* zmap, const char* key, NCjson** jsonp) -{ - int stat = NC_NOERR; - NCjson* json = NULL; - - if((stat = NCZ_downloadjson(zmap,key,&json))) goto done; - if(json != NULL && NCJsort(json) != NCJ_ARRAY) {stat = NC_ENCZARR; goto done;} - if(jsonp) {*jsonp = json; json = NULL;} -done: - NCJreclaim(json); - return stat; -} -#endif - -/* Get one of two key values from a dict */ -static int -dictgetalt(const NCjson* jdict, const char* name, const char* alt, const NCjson** jvaluep) -{ - int stat = NC_NOERR; - const NCjson* jvalue = NULL; - if((stat = NCJdictget(jdict,name,&jvalue))<0) {stat = NC_EINVAL; goto done;} /* try this first */ - if(jvalue == NULL) { - if((stat = NCJdictget(jdict,alt,&jvalue))<0) {stat = NC_EINVAL; goto done;} /* try this alternative*/ - } - if(jvaluep) *jvaluep = jvalue; -done: - return THROW(stat); -} +/**************************************************/ -/* Get _nczarr_xxx from either .zXXX or .zattrs */ +/* Convert dimrefs to dimension declarations (possibly creating them) */ static int -getnczarrkey(NC_OBJ* container, const char* name, const NCjson** jncxxxp) +reifydimrefs(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, NC_VAR_INFO_T* var, size64_t* shapes, NClist* dimrefs, NClist* dimdecls) { int stat = NC_NOERR; - const NCjson* jxxx = NULL; - NC_GRP_INFO_T* grp = NULL; - NC_VAR_INFO_T* var = NULL; - struct ZARROBJ* zobj = NULL; - - /* Decode container */ - if(container->sort == NCGRP) { - grp = (NC_GRP_INFO_T*)container; - zobj = &((NCZ_GRP_INFO_T*)grp->format_grp_info)->zgroup; - } else { - var = (NC_VAR_INFO_T*)container; - zobj = &((NCZ_VAR_INFO_T*)var->format_var_info)->zarray; - } - - /* Try .zattrs first */ - if(zobj->atts != NULL) { - jxxx = NULL; - if((stat = NCJdictget(zobj->atts,name,&jxxx))<0) {stat = NC_EINVAL; goto done;} - } - if(jxxx == NULL) { - /* Try .zxxx second */ - if(zobj->obj != NULL) { - if((stat = NCJdictget(zobj->obj,name,&jxxx))<0) {stat = NC_EINVAL; goto done;} + size_t i; + size_t rank = var->ndims; + NCbytes* fqn = NULL; + char* basename = NULL; + + fqn = ncbytesnew(); + for(i=0;i FQN */ + switch (stat = NCZ_locateFQN(file->root_grp, dimref, NCDIM, &obj, &basename)) { + case NC_NOERR: break; /* Dimension exists */ + case NC_ENOOBJECT: /* Need to create dimension */ + if((stat = definedim(file,(NC_GRP_INFO_T*)obj,basename,shapes[i],(shapes[i]==0?UNLIM:FIXED),(NC_DIM_INFO_T**)&obj))) goto done; + break; + default: goto done; /* some kind of real error */ + } + nclistpush(dimdecls,obj); + } else { /* search upwards for the dimension decl */ + assert(strchr(dimref,'/')==NULL); + if((stat = NCZ_search_name(parent,dimref,NCDIM,&obj))) goto done; + if(obj == NULL) { + /* Need to create dimension in parent group */ + if((stat = definedim(file,parent,dimref,shapes[i],(shapes[i]==0?UNLIM:FIXED),(NC_DIM_INFO_T**)&obj))) goto done; + } + nclistpush(dimdecls,(NC_DIM_INFO_T*)obj); } - if(jxxx != NULL) - zobj->nczv1 = 1; /* Mark as old style with _nczarr_xxx in obj not attributes */ } - if(jncxxxp) *jncxxxp = jxxx; done: + nullfree(basename); + ncbytesfree(fqn); return THROW(stat); } static int -downloadzarrobj(NC_FILE_INFO_T* file, struct ZARROBJ* zobj, const char* fullpath, const char* objname) +definedim(NC_FILE_INFO_T* file, NC_GRP_INFO_T* parent, const char* basename, size64_t shape, int unlim, NC_DIM_INFO_T** dimp) { int stat = NC_NOERR; - char* key = NULL; - NCZMAP* map = ((NCZ_FILE_INFO_T*)file->format_file_info)->map; - - /* Download .zXXX and .zattrs */ - nullfree(zobj->prefix); - zobj->prefix = strdup(fullpath); - NCJreclaim(zobj->obj); zobj->obj = NULL; - NCJreclaim(zobj->atts); zobj->obj = NULL; - if((stat = nczm_concat(fullpath,objname,&key))) goto done; - if((stat=NCZ_downloadjson(map,key,&zobj->obj))) goto done; - nullfree(key); key = NULL; - if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; - if((stat=NCZ_downloadjson(map,key,&zobj->atts))) goto done; + struct NCZ_DimInfo dimdef; + assert(parent->hdr.sort == NCGRP); + strncpy(dimdef.norm_name,basename,sizeof(dimdef.norm_name)); + /* Use shape as the size */ + dimdef.shape = (size_t)shape; + dimdef.unlimited = unlim; + if((stat = ncz4_create_dim(file,parent,&dimdef,dimp))) goto done; done: - nullfree(key); return THROW(stat); } diff --git a/libnczarr/ztype.c b/libnczarr/ztype.c index dd1bb57fa9..d15e897ec2 100644 --- a/libnczarr/ztype.c +++ b/libnczarr/ztype.c @@ -134,6 +134,9 @@ NCZ_inq_typeid(int ncid, const char *name, nc_type *typeidp) int NCZ_inq_typeids(int ncid, int *ntypes, int *typeids) { + NC_UNUSED(ncid); + NC_UNUSED(typeids); + ZTRACE(0,"ncid=%d",ncid); if(ntypes) *ntypes = 0; return ZUNTRACEX(NC_NOERR,"ntypes=%d typeids=%p",(ntypes?-1:*ntypes),typeids); diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index 8ca4602b24..9881ca3d94 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -11,91 +11,108 @@ */ #include "zincludes.h" -#include +#include "znc4.h" +#include "isnan.h" #undef DEBUG +/*mnemonic*/ +#define TESTUNLIM 1 + /**************************************************/ -/* Static zarr type name table */ - -/* Table of nc_type X {Zarr,NCZarr} X endianness -Issue: Need to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR -in a way that allows other Zarr implementations to read the data. - -Available info: -Write: we have the netcdf type, so there is no ambiguity. -Read: we have the variable type and also any attribute dtype, -but those types are ambiguous. -We also have the attribute vs variable type problem. -For pure zarr, we have to infer the type of an attribute, -so if we have "var:strattr = \"abcdef\"", then we need -to decide how to infer the type: NC_STRING vs NC_CHAR. - -Solution: -For variables and for NCZarr type attributes, distinquish by using: +/** +Type Issues: + +There are (currently) two type issues that need special hacks. +1. Need dtypes to distinquish NC_STRING && MAXSTRLEN==1 + (assuming fixed size strings) from NC_CHAR in a way + that allows other Zarr implementations to read the data. +2. Need a fake dtype to support the JSON convention allowing + an attribute's value to be a JSON value. + +Zarr Version 2: +------------------- +For issue 1, use these dtypes to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR * ">S1" for NC_CHAR. * "|S1" for NC_STRING && MAXSTRLEN==1 * "|Sn" for NC_STRING && MAXSTRLEN==n -This is admittedly a bit of a hack, and the first case in particular -will probably cause errors in some other Zarr implementations; the Zarr -spec is unclear about what combinations are legal. Note that we could use "|U1", but since this is utf-16 or utf-32 in python, it may cause problems when reading what amounts to utf-8. -For attributes, we infer: -* NC_CHAR if the hint is 0 - - e.g. var:strattr = 'abcdef'" => NC_CHAR -* NC_STRING if hint is NC_STRING. - - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING +For issue 2, use this type to identify a JSON valued attribute. +* "|J0" + +These choices are admittedly a bit of a hack, and the first case in particular +will probably cause errors in some other Zarr implementations; the Zarr spec +is unclear about what combinations are legal. Issue 2 will only be interpreted by +NCZarr code, so that choice is arbitrary. + +Zarr Version 3: +------------------- +For issues 1 and 2, we have the following table: +| dtype | type_alias | +| ----- | ---------- | +| uint8 | char | +| rn | string | +| uint8 | json | + + +In the event that we are reading a pure Zarr file, we need to make +inferences about the above issues but lacking any NCZarr hints. + +First, we need to define a rule to define what attribute values can be +considered a "complex" json expression. So when we read the JSON +value of an attribute, that value is classified as complex or simple. +Simple valued attributes will be mapped to atomic-valued +netcdf attributes. Complex valued attributes are "unparsed" to a +string and the attribute is stored as an NC_CHAR typed attribute. + +The current rule for defining a complex JSON valued attribute is defined +by the function NCZ_iscomplexjson(). Basically the rule is as follows: +1. If the attribute name is _nczarr_XXX, then it is inherently COMPLEX. +2. If the attribute value is a single atomic value or NULL or a JSON array + of atomic values, then the attribute value is SIMPLE. +3. Otherwise, the attribute value is COMPLEX. + +In the event that we want to write a complex JSON valued attribute, +we use the following rules in order (see NCZ_iscomplexjsontext()): +1. Attribute type is not of type NC_CHAR => not complex +2. Attribute value contains no unescaped '[' and no unescaped '{' => not complex +3. The value, treated as a string, is not JSON parseable => notcomplex +4. else the value can be treated as a complex json value. + +This is admittedly a hack that uses rule 2 to delay parsing the +attribute value as long as possible. Note the rules will change +when/if structured types (e.g. compound, complex) are added. + +Assuming the attribute value is not a complex JSON expression, we assume +the value is a single atomic value or an array of atomic values. + +We infer the type -- see NCZ_inferattrtype() -- by looking at the +first (possibly only) value of the attribute. The only tricky part of this +occurs when we have a JSON string value. We need to decide if the type +should be treated as NC_CHAR or as NC_STRING. +The current rules are as follows: +1. choose NC_CHAR if: + a. The value is a single value (not a JSON array) and NCJsort == NCJ_STRING + b. The value is an array and each element of the array + is a single character. +2. else choose NC_STRING. + +So, for example: +* "a" => NC_CHAR +* "abcdef" => NC_CHAR +* ["abcdef"] => NC_STRING +* ["a","b","c","d","e","f"] => NC_CHAR +* ["abc", "def"] => NC_STRING -Note also that if we read a pure zarr file we will probably always -see "|S1", so we will never see a variable of type NC_CHAR. -We might however see an attribute of type string. */ -static const struct ZTYPES { - char* zarr[3]; - char* nczarr[3]; -} znames[NUM_ATOMIC_TYPES] = { -/* nc_type Pure Zarr NCZarr - NE LE BE NE LE BE*/ -/*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, -/*NC_BYTE*/ {{"|i1","i1"},{"|i1","i1"}}, -/*NC_CHAR*/ {{">S1",">S1",">S1"},{">S1",">S1",">S1"}}, -/*NC_SHORT*/ {{"|i2","i2"},{"|i2","i2"}}, -/*NC_INT*/ {{"|i4","i4"},{"|i4","i4"}}, -/*NC_FLOAT*/ {{"|f4","f4"},{"|f4","f4"}}, -/*NC_DOUBLE*/ {{"|f8","f8"},{"|f8","f8"}}, -/*NC_UBYTE*/ {{"|u1","u1"},{"|u1","u1"}}, -/*NC_USHORT*/ {{"|u2","u2"},{"|u2","u2"}}, -/*NC_UINT*/ {{"|u4","u4"},{"|u4","u4"}}, -/*NC_INT64*/ {{"|i8","i8"},{"|i8","i8"}}, -/*NC_UINT64*/ {{"|u8","u8"},{"|u8","u8"}}, -/*NC_STRING*/ {{"|S%d","|S%d","|S%d"},{"|S%d","|S%d","|S%d"}}, -}; - -#if 0 -static const char* zfillvalue[NUM_ATOMIC_TYPES] = { -NULL, /*NC_NAT*/ -"-127", /*NC_BYTE*/ -"0", /*NC_CHAR*/ -"-32767", /*NC_SHORT*/ -"-2147483647", /*NC_INT*/ -"9.9692099683868690e+36f", /* near 15 * 2^119 */ /*NC_FLOAT*/ -"9.9692099683868690e+36", /*NC_DOUBLE*/ -"255", /*NC_UBYTE*/ -"65535", /*NC_USHORT*/ -"4294967295", /*NC_UINT*/ -"-9223372036854775806", /*NC_INT64*/ -"18446744073709551614", /*NC_UINT64*/ -"", /*NC_STRING*/ -}; -#endif /* map nc_type -> NCJ_SORT */ -static int zjsonsort[NUM_ATOMIC_TYPES] = { +static int zobjsort[N_NCZARR_TYPES] = { NCJ_UNDEF, /*NC_NAT*/ NCJ_INT, /*NC_BYTE*/ -NCJ_INT, /*NC_CHAR*/ +NCJ_STRING, /*NC_CHAR*/ NCJ_INT, /*NC_SHORT*/ NCJ_INT, /*NC_INT*/ NCJ_DOUBLE, /*NC_FLOAT*/ @@ -106,9 +123,17 @@ NCJ_INT, /*NC_UINT*/ NCJ_INT, /*NC_INT64*/ NCJ_INT, /*NC_UINT64*/ NCJ_STRING, /*NC_STRING*/ +NCJ_DICT, /*NC_JSON*/ }; /* Forward */ +static int splitfqn(const char* fqn0, NClist* segments); +static int locatedimbyname(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* dimname, NC_DIM_INFO_T** dimp, NC_GRP_INFO_T** grpp); +static int isconsistentdim(NC_DIM_INFO_T* dim, NCZ_DimInfo* dimdata, int testunlim); +static int locateconsistentdim(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCZ_DimInfo* dimdata, int testunlim, NC_DIM_INFO_T** dimp, NC_GRP_INFO_T** grpp); +static int cmp_strings(const void* a1, const void* a2); +static int NCZ_inferinttype(unsigned long long u64, int negative); +static nc_type NCZ_applytypehint(nc_type typeid, nc_type typehint); /**************************************************/ @@ -177,35 +202,6 @@ NCZ_varkey(const NC_VAR_INFO_T* var, char** pathp) return stat; } -/** -@internal Get key for a dimension -@param dim - [in] dim -@param pathp - [out] full path -@return NC_NOERR -@author Dennis Heimbigner -*/ -int -NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp) -{ - int stat = NC_NOERR; - char* grppath = NULL; - char* dimpath = NULL; - - /* Start by creating the full path for the parent group */ - if((stat = NCZ_grpkey(dim->container,&grppath))) - goto done; - /* Create the suffix path using the dim name */ - if((stat = nczm_concat(grppath,dim->hdr.name,&dimpath))) - goto done; - /* return path */ - if(pathp) {*pathp = dimpath; dimpath = NULL;} - -done: - nullfree(grppath); - nullfree(dimpath); - return stat; -} - /** @internal Split a key into pieces along '/' character; elide any leading '/' @param key - [in] @@ -226,9 +222,8 @@ ncz_splitkey(const char* key, NClist* segments) @internal Down load a .z... structure into memory @param zmap - [in] controlling zarr map @param key - [in] .z... object to load -@param jsonp - [out] root of the loaded json (NULL if key does not exist) -@return NC_NOERR -@return NC_EXXX +@param jsonp - [out] root of the loaded json +@return NC_NOERR || NC_EXXX @author Dennis Heimbigner */ int @@ -239,26 +234,25 @@ NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) char* content = NULL; NCjson* json = NULL; - switch(stat = nczmap_len(zmap, key, &len)) { + switch (stat = nczmap_len(zmap, key, &len)) { case NC_NOERR: break; - case NC_ENOOBJECT: case NC_EEMPTY: - stat = NC_NOERR; - goto exit; + case NC_ENOOBJECT: stat = NC_NOERR; goto ret; default: goto done; } + if((content = malloc(len+1)) == NULL) {stat = NC_ENOMEM; goto done;} if((stat = nczmap_read(zmap, key, 0, len, (void*)content))) goto done; content[len] = '\0'; - if((stat = NCJparse(content,0,&json)) < 0) - {stat = NC_ENCZARR; goto done;} -exit: + NCJcheck(NCJparse(content,0,&json)); + +ret: if(jsonp) {*jsonp = json; json = NULL;} done: - NCJreclaim(json); + NCZ_reclaim_json(json); nullfree(content); return stat; } @@ -272,7 +266,7 @@ NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) @author Dennis Heimbigner */ int -NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json) +NCZ_uploadjson(NCZMAP* zmap, const char* key, const NCjson* json) { int stat = NC_NOERR; char* content = NULL; @@ -283,8 +277,7 @@ NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json) fprintf(stderr,"uploadjson: %s\n",key); fflush(stderr); #endif /* Unparse the modified json tree */ - if((stat = NCJunparse(json,0,&content))) - goto done; + NCJcheck(NCJunparse(json,0,&content)); ZTRACEMORE(4,"\tjson=%s",content); if(getenv("NCS3JSON") != NULL) @@ -299,91 +292,6 @@ fprintf(stderr,">>>> uploadjson: %s: %s\n",key,content); return ZUNTRACE(stat); } -#if 0 -/** -@internal create object, return empty dict; ok if already exists. -@param zmap - [in] map -@param key - [in] key of the object -@param jsonp - [out] return parsed json -@return NC_NOERR -@return NC_EINVAL if object exists -@author Dennis Heimbigner -*/ -int -NCZ_createdict(NCZMAP* zmap, const char* key, NCjson** jsonp) -{ - int stat = NC_NOERR; - NCjson* json = NULL; - - /* See if it already exists */ - if((stat = NCZ_downloadjson(zmap,key,&json))) goto done; - ifjson == NULL) { - if((stat = nczmap_def(zmap,key,NCZ_ISMETA))) goto done; - } else { - /* Already exists, fail */ - stat = NC_EINVAL; - goto done; - } - /* Create the empty dictionary */ - if((stat = NCJnew(NCJ_DICT,&json))) - goto done; - if(jsonp) {*jsonp = json; json = NULL;} -done: - NCJreclaim(json); - return stat; -} - -/** -@internal create object, return empty array; ok if already exists. -@param zmap - [in] map -@param key - [in] key of the object -@param jsonp - [out] return parsed json -@return NC_NOERR -@return NC_EINVAL if object exits -@author Dennis Heimbigner -*/ -int -NCZ_createarray(NCZMAP* zmap, const char* key, NCjson** jsonp) -{ - int stat = NC_NOERR; - NCjson* json = NULL; - - if((stat = NCZ_downloadjson(zmap,key,&json))) goto done; - if(json == NULL) { /* create it */ - if((stat = nczmap_def(zmap,key,NCZ_ISMETA))) goto done; - /* Create the initial array */ - if((stat = NCJnew(NCJ_ARRAY,&json))) goto done; - } else { - stat = NC_EINVAL; - goto done; - } - if(json->sort != NCJ_ARRAY) {stat = NC_ENCZARR; goto done;} - if(jsonp) {*jsonp = json; json = NULL;} -done: - NCJreclaim(json); - return stat; -} -#endif /*0*/ - -#if 0 -/** -@internal Given an nc_type, produce the corresponding -default fill value as a string. -@param nctype - [in] nc_type -@param defaltp - [out] pointer to hold pointer to the value -@return NC_NOERR -@author Dennis Heimbigner -*/ - -int -ncz_default_fill_value(nc_type nctype, const char** dfaltp) -{ - if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; - if(dfaltp) *dfaltp = zfillvalue[nctype]; - return NC_NOERR; -} -#endif - /** @internal Given an nc_type, produce the corresponding fill value JSON type @@ -396,8 +304,8 @@ fill value JSON type int ncz_fill_value_sort(nc_type nctype, int* sortp) { - if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; - if(sortp) *sortp = zjsonsort[nctype]; + if(nctype <= 0 || nctype > N_NCZARR_TYPES) return NC_EINVAL; + if(sortp) *sortp = zobjsort[nctype]; return NC_NOERR; } @@ -413,7 +321,7 @@ NCZ_isLittleEndian(void) return (u.bytes[0] == 1 ? 1 : 0); } - +#if 0 /* Given a path to a group, return the list of objects that contain another object with the name of the tag. @@ -432,7 +340,7 @@ NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NC NCbytes* path = ncbytesnew(); /* Get the list of names just below prefix */ - if((stat = nczmap_search(map,prefix,matches))) goto done; + if((stat = nczmap_list(map,prefix,matches))) goto done; for(i=0;ipure zarr, 0 => nczarr -@param strlen - [in] max string length -@param namep - [out] pointer to hold pointer to the dtype; user frees -@return NC_NOERR -@return NC_EINVAL -@author Dennis Heimbigner -*/ - -int -ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int len, char** dnamep) -{ - char dname[64]; - char* format = NULL; - - if(nctype <= NC_NAT || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; - if(purezarr) - format = znames[nctype].zarr[endianness]; - else - format = znames[nctype].nczarr[endianness]; - snprintf(dname,sizeof(dname),format,len); - if(dnamep) *dnamep = strdup(dname); - return NC_NOERR; -} - -/* -@internal Convert a numcodecs dtype spec to a corresponding nc_type. -@param nctype - [in] dtype the dtype to convert -@param nctype - [in] typehint help disambiguate char vs string -@param purezarr - [in] 1=>pure zarr, 0 => nczarr -@param nctypep - [out] hold corresponding type -@param endianp - [out] hold corresponding endianness -@param typelenp - [out] hold corresponding type size (for fixed length strings) -@return NC_NOERR -@return NC_EINVAL -@author Dennis Heimbigner -*/ - +/* Infer the attribute's type based on its value(s).*/ int -ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) +NCZ_inferattrtype(const char* aname, nc_type typehint, const NCjson* values, nc_type* typeidp) { int stat = NC_NOERR; - int typelen = 0; - int count; - char tchar; - nc_type nctype = NC_NAT; - int endianness = -1; - const char* p; - int n; - - if(endianp) *endianp = NC_ENDIAN_NATIVE; - if(nctypep) *nctypep = NC_NAT; - - if(dtype == NULL) goto zerr; - p = dtype; - switch (*p++) { - case '<': endianness = NC_ENDIAN_LITTLE; break; - case '>': endianness = NC_ENDIAN_BIG; break; - case '|': endianness = NC_ENDIAN_NATIVE; break; - default: p--; endianness = NC_ENDIAN_NATIVE; break; - } - tchar = *p++; /* get the base type */ - /* Decode the type length */ - count = sscanf(p,"%d%n",&typelen,&n); - if(count == 0) goto zerr; - p += n; - - /* Short circuit fixed length strings */ - if(tchar == 'S') { - /* Fixed length string */ - switch (typelen) { - case 1: - nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING); - if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */ - break; - default: - nctype = NC_STRING; - break; - } - /* String/char have no endianness */ - endianness = NC_ENDIAN_NATIVE; - } else { - switch(typelen) { - case 1: - switch (tchar) { - case 'i': nctype = NC_BYTE; break; - case 'u': nctype = NC_UBYTE; break; - default: goto zerr; - } - break; - case 2: - switch (tchar) { - case 'i': nctype = NC_SHORT; break; - case 'u': nctype = NC_USHORT; break; - default: goto zerr; - } - break; - case 4: - switch (tchar) { - case 'i': nctype = NC_INT; break; - case 'u': nctype = NC_UINT; break; - case 'f': nctype = NC_FLOAT; break; - default: goto zerr; - } - break; - case 8: - switch (tchar) { - case 'i': nctype = NC_INT64; break; - case 'u': nctype = NC_UINT64; break; - case 'f': nctype = NC_DOUBLE; break; - default: goto zerr; - } - break; - default: goto zerr; - } - } - -#if 0 - /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ - if(endianness == NC_ENDIAN_NATIVE) - endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); -#endif - - if(nctypep) *nctypep = nctype; - if(typelenp) *typelenp = typelen; - if(endianp) *endianp = endianness; - -done: - return stat; -zerr: - stat = NC_ENCZARR; - goto done; -} - -/* Infer the attribute's type based -primarily on the first atomic value encountered -recursively. -*/ -int -NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* typeidp) -{ - int i,stat = NC_NOERR; nc_type typeid; - NCjson* j = NULL; unsigned long long u64; long long i64; int negative = 0; + int singleton = 0; + const NCjson* value = NULL; - if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) + NC_UNUSED(aname); + + if(NCJsort(values) == NCJ_ARRAY && NCJarraylength(values) == 0) {typeid = NC_NAT; goto done;} /* Empty array is illegal */ - if(NCJsort(value) == NCJ_NULL) + if(NCJsort(values) == NCJ_NULL) {typeid = NC_NAT; goto done;} /* NULL is also illegal */ - if(NCJsort(value) == NCJ_DICT) /* Complex JSON expr -- a dictionary */ - {typeid = NC_NAT; goto done;} - - /* If an array, make sure all the elements are simple */ - if(value->sort == NCJ_ARRAY) { - for(i=0;isort == NCJ_ARRAY) { - j=NCJith(value,0); - return NCZ_inferattrtype(j,typehint,typeidp); + /* Get the first element */ + if(NCJsort(values) == NCJ_ARRAY) { + value = NCJith(values,0); + } else if(NCJisatomic(values)) { + value = values; /*singleton*/ + singleton = 1; } - /* At this point, value is a primitive JSON Value */ - + /* Look at the first element */ switch (NCJsort(value)) { case NCJ_NULL: - typeid = NC_NAT; - return NC_NOERR; + case NCJ_UNDEF: + stat = NC_EINVAL; + goto done; + case NCJ_ARRAY: case NCJ_DICT: - typeid = NC_CHAR; + typeid = NC_JSON; goto done; - case NCJ_UNDEF: - return NC_EINVAL; - default: /* atomic */ + default: /* atomic type */ break; } - if(NCJstring(value) != NULL) - negative = (NCJstring(value)[0] == '-'); - switch (value->sort) { + switch (NCJsort(value)) { case NCJ_INT: + if(NCJstring(value) != NULL) negative = (NCJstring(value)[0] == '-'); if(negative) { sscanf(NCJstring(value),"%lld",&i64); u64 = (unsigned long long)i64; @@ -679,12 +432,38 @@ NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* typeidp) case NCJ_BOOLEAN: typeid = NC_UBYTE; break; - case NCJ_STRING: /* requires special handling as an array of characters */ - typeid = NC_CHAR; - break; + case NCJ_STRING: {/* Special cases */ + double* naninfp = NULL; + naninfp = NCZ_isnaninfstring(NCJstring(value)); + if(naninfp == NULL) + typeid = NC_STRING; + else /* Might be double or float */ + typeid = (typehint == NC_NAT ? NC_DOUBLE : typehint); + } break; default: stat = NC_ENCZARR; + goto done; + } + + /* Infer NC_CHAR vs NC_STRING */ + if(typeid == NC_STRING) { + if(singleton && NCJsort(value) == NCJ_STRING) + typeid = NC_CHAR; + else if(NCJsort(values) == NCJ_ARRAY) { + int ischar1; + size_t i; + for(ischar1=1,i=0;ifill_value) { - int tid = var->type_info->hdr.id; - stat = NC_reclaim_data_all(var->container->nc4_info->controller,tid,var->fill_value,1); - var->fill_value = NULL; - } - /* Reclaim any existing fill_chunk */ - if(!stat) stat = NCZ_reclaim_fill_chunk(((NCZ_VAR_INFO_T*)var->format_var_info)->cache); - return stat; -} - -int -NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp) -{ - int stat = NC_NOERR; - int tid = var->type_info->hdr.id; - void* dst = NULL; - - if(var->fill_value) { - if((stat = NC_copy_data_all(var->container->nc4_info->controller,tid,var->fill_value,1,&dst))) goto done; - } - if(dstp) {*dstp = dst; dst = NULL;} -done: - if(dst) (void)NC_reclaim_data_all(var->container->nc4_info->controller,tid,dst,1); - return stat; -} - /* Get max str len for a variable or grp */ /* Has side effect of setting values in the internal data structures */ -int +size_t NCZ_get_maxstrlen(NC_OBJ* obj) { - int maxstrlen = 0; + size_t maxstrlen = 0; assert(obj->sort == NCGRP || obj->sort == NCVAR); if(obj->sort == NCGRP) { NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)obj; NC_FILE_INFO_T* file = grp->nc4_info; NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; if(zfile->default_maxstrlen == 0) - zfile->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + zsetdfaltstrlen(NCZ_MAXSTR_DFALT,file); maxstrlen = zfile->default_maxstrlen; } else { /*(obj->sort == NCVAR)*/ NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)obj; NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; if(zvar->maxstrlen == 0) - zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var->container); + zsetmaxstrlen(NCZ_get_maxstrlen((NC_OBJ*)var->container),var); maxstrlen = zvar->maxstrlen; } return maxstrlen; } +/* Get dimension separator for a variable */ +/* Has side effect of setting values in the + internal data structures */ +char +NCZ_get_dimsep(NC_VAR_INFO_T* var) +{ + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + + if(zvar->dimension_separator == '\0') { + NCglobalstate* gs = NC_getglobalstate(); + assert(gs != NULL); + assert(gs->zarr.dimension_separator != '\0'); + zvar->dimension_separator = gs->zarr.dimension_separator; + } + return zvar->dimension_separator; +} + int -NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) +NCZ_fixed2char(const void* fixed, char** charp, size_t count, size_t maxstrlen) { size_t i; unsigned char* sp = NULL; @@ -977,19 +758,18 @@ NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) } int -NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen) +NCZ_char2fixed(const char** charp, void* fixed, size_t count, size_t maxstrlen) { size_t i; unsigned char* p = fixed; memset(fixed,0,maxstrlen*count); /* clear target */ for(i=0;i maxstrlen) len = maxstrlen; memcpy(p,charp[i],len); - } else { - memset(p,'\0',maxstrlen); } } return NC_NOERR; @@ -1016,53 +796,792 @@ NCZ_copy_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const void* memory, size return stat; } -#if 0 -/* Recursive helper */ -static int -checksimplejson(NCjson* json, int depth) +/* Return 1 if the attribute will be stored as a complex JSON valued attribute; return 0 otherwise */ +int +NCZ_iscomplexjson(const char* aname, const NCjson* json) { - int i; + int stat = 0; + size_t i; + const NC_reservedatt* rc = NULL; + + /* See if this attribute is reserved and marked as COMPLEXJSON */ + rc = NC_findreserved(aname); + if(rc != NULL && (rc->flags & COMPLEXJSON) != 0) + {stat = 1; goto done;} switch (NCJsort(json)) { case NCJ_ARRAY: - if(depth > 0) return 0; /* e.g. [...,[...],...] or [...,{...},...] */ - for(i=0;i < NCJlength(json);i++) { + /* see if it is a simple vector of atomic values */ + for(i=0;i < NCJarraylength(json);i++) { NCjson* j = NCJith(json,i); - if(!checksimplejson(j,depth+1)) return 0; + if(!NCJisatomic(j)) {stat = 1; goto done;} } break; case NCJ_DICT: case NCJ_NULL: case NCJ_UNDEF: - return 0; + stat = 1; goto done; default: break; } - return 1; +done: + return stat; } -#endif -/* Return 1 if the attribute will be stored as a complex JSON valued attribute; return 0 otherwise */ +/* Return 1 if the attribute value as a string should be stored as complex json +Assumes attribute type is NC_CHAR. The attribute name is involved because +_nczarr_XXX is inherently complex json. + +@param aname name of the attribute +@param text of the attribute as a string +@param jsonp return the parsed json here (if parseable) +@return 1 if is complex json +*/ int -NCZ_iscomplexjson(const NCjson* json, nc_type typehint) +NCZ_iscomplexjsonstring(const char* aname, size_t textlen, const char* text, NCjson** jsonp) { - int i, stat = 0; + int stat = NC_NOERR; + NCjson* json = NULL; + const char* p; + int iscomplex, instring; + size_t i; + const NC_reservedatt* rc = NULL; + + if(jsonp) *jsonp = NULL; + if(text == NULL || textlen < 2) return 0; + + instring = 0; + iscomplex = 0; + + /* See if this attribute is reserved and marked as COMPLEXJSON */ + rc = NC_findreserved(aname); + if(rc != NULL && (rc->flags & COMPLEXJSON) != 0) + {iscomplex = 1; goto loopexit;} + + /* Faster than a full parse */ + for(i=0,p=text;iname); + + /* Compute the parent group of the object */ + switch (obj->sort) { + case NCDIM: grp = ((NC_DIM_INFO_T*)obj)->container; break; + case NCVAR: grp = ((NC_VAR_INFO_T*)obj)->container; break; + case NCTYP: grp = ((NC_TYPE_INFO_T*)obj)->container; break; + case NCGRP: grp = ((NC_GRP_INFO_T*)obj)->parent; break; + default: stat = NC_EINVAL; goto done; break; + } + + /* Collect the group prefix segments (escaped) in forward order; leave out the root group */ + for(;grp->parent!=NULL;grp=grp->parent) { + /* Add in the group name */ + nclistinsert(segments,0,grp->hdr.name); + } + + /* Create the the fqn */ + for(i=0;i objectp contains where it should be) +@return NC_EXXX +*/ +int +NCZ_locateFQN(NC_GRP_INFO_T* parent, const char* fqn, NC_SORT sort, NC_OBJ** objectp, char** basenamep) +{ + int ret = NC_NOERR; + size_t i; + NC_GRP_INFO_T* grp = NULL; + NC_OBJ* object = NULL; + NClist* segments = nclistnew(); + size_t count = 0; + + assert(fqn != NULL && fqn[0] == '/'); + /* Step 1: Break fqn into segments at occurrences of '/' */ + if((ret = splitfqn(fqn,segments))) goto done; + count = nclistlength(segments); + + /* walk to convert to groups + 1 left over for the final object*/ + grp = parent; + for(i=0;ichildren,segment); + if(object == NULL || object->sort != NCGRP) {ret = NC_ENOOBJECT; goto done;} + grp = (NC_GRP_INFO_T*)object; object = NULL; + } + /* Find an object to match the sort and last segment */ + do { + const char* segment = (const char*)nclistget(segments,count-1); /* last segment */ + /* pass up to the caller */ + if(basenamep) *basenamep = strdup(segment); + object = ncindexlookup(grp->children,segment); + if(object != NULL && (sort == NCNAT || sort == NCGRP)) break; /* match */ + object = ncindexlookup(grp->dim,segment); + if(object != NULL && (sort == NCNAT || sort == NCDIM)) break; /* match */ + object = ncindexlookup(grp->vars,segment); + if(object != NULL && (sort == NCNAT || sort == NCVAR)) break; /* match */ + object = ncindexlookup(grp->type,segment); + if(object != NULL && (sort == NCNAT || sort == NCTYP)) break; /* match */ + object = ncindexlookup(grp->att,segment); + if(object != NULL && (sort == NCNAT || sort == NCATT)) break; /* match */ + object = NULL; /* not found */ + } while(0); + if(object == NULL) {object = (NC_OBJ*)grp; ret = NC_ENOOBJECT;} + if(objectp) *objectp = object; +done: + nclistfreeall(segments); + return THROW(ret); +} + +/* Search upward for an object matching the given name and of given sort. +@param startgrp start search here +@param name of the object +@param sort of desired object +@param objectp return pointer to matching object, or if not found, + then to the group where it should have been found. +@param +@return NC_NOERR +@return NC_ENOOBJECT if object not found (=> objectp contains where it should be) +@return NC_EXXX + +Note: if we were searching for type, then the netcdf rule requires searching the whole object tree. +*/ +int +NCZ_search_name(NC_GRP_INFO_T* startgrp, const char* name, NC_SORT sort, NC_OBJ** objectp) +{ + int ret = NC_NOERR; + NC_GRP_INFO_T* grp = NULL; + NC_OBJ* object = NULL; + + /* walk to convert to groups + 1 left over for the final object*/ + for(grp=startgrp;grp != NULL;grp=grp->parent) { + /* Find an object to match the sort and name */ + object = ncindexlookup(grp->children,name); + if(object != NULL && (sort == NCNAT || sort == NCGRP)) break; /* match */ + object = ncindexlookup(grp->dim,name); + if(object != NULL && (sort == NCNAT || sort == NCDIM)) break; /* match */ + object = ncindexlookup(grp->vars,name); + if(object != NULL && (sort == NCNAT || sort == NCVAR)) break; /* match */ + object = ncindexlookup(grp->type,name); + if(object != NULL && (sort == NCNAT || sort == NCTYP)) break; /* match */ + object = ncindexlookup(grp->att,name); + if(object != NULL && (sort == NCNAT || sort == NCATT)) break; /* match */ + object = NULL; /* not found */ + } + if(objectp) *objectp = object; + return THROW(ret); +} + +char* +NCZ_backslashescape(const char* s) +{ + const char* p; + char* q; + size_t len; + char* escaped = NULL; + + len = strlen(s); + escaped = (char*)malloc(1+(2*len)); /* max is everychar is escaped */ + if(escaped == NULL) return NULL; + for(p=s,q=escaped;*p;p++) { + char c = *p; + switch (c) { + case '\\': + case '.': + case '@': + *q++ = '\\'; *q++ = '\\'; + break; + default: *q++ = c; break; } - break; - case NCJ_DICT: - case NCJ_NULL: - case NCJ_UNDEF: - stat = 1; goto done; - default: break; } + *q = '\0'; + return escaped; +} + +char* +NCZ_deescape(const char* esc) +{ + size_t len; + char* s; + const char* p; + char* q; + + if(esc == NULL) return NULL; + len = strlen(esc); + s = (char*)malloc(len+1); + if(s == NULL) return NULL; + for(p=esc,q=s;*p;) { + switch (*p) { + case '\\': + p++; + /* fall thru */ + default: *q++ = *p++; break; + } + } + *q = '\0'; + return s; +} + +int +NCZ_sortstringlist(void* vec, size_t count) +{ + if(vec != NULL && count > 0) { + qsort(vec, count, sizeof(void*), cmp_strings); + } + return NC_NOERR; +} + +void +NCZ_setatts_read(NC_OBJ* container) +{ + if(container->sort == NCGRP) + ((NC_GRP_INFO_T*)container)->atts_read = 1; + else /* container->sort == NCVAR */ + ((NC_VAR_INFO_T*)container)->atts_read = 1; +} + +/* Convert a list of integer strings to size64_t integers */ +int +NCZ_decodesizet64vec(const NCjson* jshape, size_t* rankp, size64_t* shapes) +{ + int stat = NC_NOERR; + size_t i; + + if(rankp) *rankp = NCJarraylength(jshape); + for(i=0;ifqn); + nullfree(dd); + } +} + +void +NCZ_reclaim_dimdecl_list(NClist* dimdecls) +{ + if(dimdecls != NULL) { + size_t i; + for(i=0;inorm_name); /* Use this name as candidate */ + + /* See if there is an accessible consistent dimension with same name */ + if((stat = locateconsistentdim(file,parent,dimdata,!TESTUNLIM,&dim,&grp))) goto done; + + if(dim != NULL) goto ret; /* we found a consistent dim already exists */ + if(dim == NULL && grp == NULL) goto ret; /* Ok to create the dim in the parent group */ + + /* Dim exists, but is inconsistent */ + /* Otherwise, we have to find a unique name that can be created in parent group */ + for(loopcounter=1;;loopcounter++) { + /* cleanup from last loop */ + dim = NULL; /* reset loop exit */ + /* Make unique name using loopcounter */ + ncbytesclear(newname); + ncbytescat(newname,dimdata->norm_name); + snprintf(digits,sizeof(digits),"_%zu",loopcounter); + ncbytescat(newname,digits); + /* See if there is an accessible dimension with same name and in this parent group */ + dim = (NC_DIM_INFO_T*)ncindexlookup(parent->dim,dimdata->norm_name); + if(dim != NULL && isconsistentdim(dim,dimdata,!TESTUNLIM)) { + /* Return this name */ + ncbytesclear(dimname); + ncbytescat(dimname,ncbytescontents(newname)); + break; + } /* else try another name */ + } /* loopcounter */ + +ret: + if(dimp) *dimp = dim; + done: + ncbytesfree(newname); + return THROW(stat); +} + +/* Get one of multiple key alternatives from a dict */ +static int +dictgetaltn(const NCjson* jdict, const NCjson** jvaluep, size_t nkeys, const char** keys) +{ + int stat = NC_NOERR; + const NCjson* jvalue = NULL; + const char** pkey; + size_t i; + + for(i=0,pkey=keys;iformat_file_info; + const NCjson* jxxx = NULL; + + /* Try jatts first */ + if(zobj->jatts != NULL) { + jxxx = NULL; + NCJcheck(NCJdictget(zobj->jatts,name,(NCjson**)&jxxx)); + } + if(jxxx == NULL) { + /* Try .zxxx second */ + if(zobj->jobj != NULL) { + NCJcheck(NCJdictget(zobj->jobj,name,(NCjson**)&jxxx)); + } + /* Mark as old style with _nczarr_xxx in obj as keys not attributes */ + zfile->flags |= FLAG_NCZARR_KEY; + } + if(jncxxxp) *jncxxxp = jxxx; +done: + return THROW(stat); +} + +void +NCZ_clearAttrInfo(NC_FILE_INFO_T* file, struct NCZ_AttrInfo* ainfo) +{ + if(ainfo == NULL) return; + if(ainfo->data != NULL) { + assert(ainfo->datalen > 0); + (void)NC_reclaim_data_all(file->controller,ainfo->nctype,ainfo->data,ainfo->datalen); + } + *ainfo = NCZ_emptyAttrInfo(); +} + +struct NCZ_AttrInfo +NCZ_emptyAttrInfo(void) +{ + static struct NCZ_AttrInfo ai = {NULL,NC_NAT,0,NC_ENDIAN_NATIVE,0,NULL}; + return ai; +} + +/**************************************************/ +#if 0 +/* Convert a JSON singleton or array of strings to a single string */ +static int +zcharify(const NCjson* src, NCbytes* buf) +{ + int stat = NC_NOERR; + size_t i; + struct NCJconst jstr; + + memset(&jstr,0,sizeof(jstr)); + + if(NCJsort(src) != NCJ_ARRAY) { /* singleton */ + NCJcheck(NCJcvt(src, NCJ_STRING, &jstr)); + ncbytescat(buf,jstr.sval); + } else for(i=0;i 0) count=1; else count = 0; + /* Break fqn into pieces at occurrences of '/' */ + for(p=start;*p;) { + switch(*p) { + case '\\': + p+=2; + break; + case '/': /*capture the piece name */ + *p++ = '\0'; + start = p; /* mark start of the next part */ + count++; + break; + default: /* ordinary char */ + p++; + break; + } + } + /* collect segments */ + p = fqn+1; + for(i=0;itest matching unlimited flags; 0=>test for size only +*/ +static int +isconsistentdim(NC_DIM_INFO_T* dim, NCZ_DimInfo* dimdata, int testunlim) +{ + if(dim->len != dimdata->shape) return 0; + if(testunlim) { + if(dim->unlimited && !dimdata->unlimited) return 0; + if(!dim->unlimited && dimdata->unlimited) return 0; + } + return 1; +} + +/** Locate a dimension by name only moving to higher groups as needed. +@param file dataset +@param grp grp to start search +@param dimname to find +@param dimp store dim here; null if not found +@param grpp store grp containing the matched dimension +Note: *dimp != NULL => *grpp != NULL && *dimp==NULL => *grpp==NULL +Note: *dimp == NULL => no dim exists with matching name +*/ +static int +locatedimbyname(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* dimname, NC_DIM_INFO_T** dimp, NC_GRP_INFO_T** grpp) +{ + int stat = NC_NOERR; + NC_DIM_INFO_T* dim = NULL; + NC_GRP_INFO_T* g = NULL; + NC_GRP_INFO_T* dimg = NULL; + + NC_UNUSED(file); + + if(dimp) *dimp = NULL; + if(grpp) *grpp = NULL; + + /* Search upwards in containing groups */ + for(g=grp;g != NULL;g=g->parent) { + dim = (NC_DIM_INFO_T*)ncindexlookup(g->dim,dimname); + if(dim != NULL) {dimg = g; break;} + dim = NULL; + } + if(dimp) *dimp = dim; + if(grpp) *grpp = dimg; + return THROW(stat); +} + +/** Locate a dimension by dimdata moving to higher groups as needed. +@param file dataset +@param grp grp to start search +@param dimdata for consistency test +@param testunlim 1 => include unlim in test +@param dimp store dim here; null if not found +@param grpp store grp containing dim here; +Note: *dimp != NULL => *grpp != NULL && *dimp==NULL => *grpp==NULL +Note that *dimp==NULL && *grpp==NULL => there was no dim with given name. +*/ +static int +locateconsistentdim(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCZ_DimInfo* dimdata, int testunlim, NC_DIM_INFO_T** dimp, NC_GRP_INFO_T** grpp) +{ + int stat = NC_NOERR; + NC_DIM_INFO_T* dim = NULL; + NC_GRP_INFO_T* g = NULL; + + NC_UNUSED(testunlim); + + if(dimp) *dimp = NULL; + + for(g=grp;g != NULL;g=g->parent,dim=NULL) { + if((stat = locatedimbyname(file,g,dimdata->norm_name,&dim,&g))) goto done; + if(dim == NULL) break; /* no name match */ + /* See if consistent */ + if(isconsistentdim(dim,dimdata,!TESTUNLIM)) break; /* use this dim */ + } + + if(dimp) *dimp = dim; + if(grpp) *grpp = g; +done: + return THROW(stat); +} + +#if 0 +/** +Implement the JSON convention: +Parse it as JSON and use that as its value in .zattrs. +*/ +static int +json_convention_write(size_t len, const void* data, NCjson** jsonp, int* isjsonp) +{ + int stat = NC_NOERR; + NCjson* jexpr = NULL; + int isjson = 0; + + assert(jsonp != NULL); + if(NCJparsen(len,(char*)data,0,&jexpr)) { + /* Ok, just treat as sequence of chars */ + NCJnewstringn(NCJ_STRING, len, data, &jexpr); + } + isjson = 1; + *jsonp = jexpr; jexpr = NULL; + if(isjsonp) *isjsonp = isjson; +done: + NCZ_reclaim_json(jexpr); + return stat; +} +#endif + +/* Support for nan and inf as strings */ + +/* De-stringified nan and inf (all lower case)*/ +/* Warning: modified during sort so do not make const */ +static struct NANINF {const char* name; double dvalue;} naninfnames[] = { +{"nanf", NAN }, +{"nan", NAN }, +{"inff", INFINITY }, +{"inf", INFINITY }, +{"infinityf", INFINITY }, +{"infinity", INFINITY }, +{"-infinityf", -INFINITY}, +{"-infinity", -INFINITY}, +}; +#define NNANINF (sizeof(naninfnames)/sizeof(struct NANINF)) +static int naninfsorted = 0; + +static int +nisort(const void* a, const void* b) +{ + const struct NANINF* nia = (struct NANINF*)a; + const struct NANINF* nib = (struct NANINF*)b; + return strcasecmp(nia->name,nib->name); +} + +static int +nicmp(const void* key, const void* elem) +{ + const struct NANINF* nie = (struct NANINF*)elem; + return strcasecmp(key,nie->name); +} + +double* +NCZ_isnaninfstring(const char* val) +{ + struct NANINF* match = NULL; + if(!naninfsorted) { + qsort((void*)naninfnames, NNANINF, sizeof(struct NANINF), nisort); + naninfsorted = 1; + } + /* Binary search the set of nan/inf names */ + assert(naninfsorted); + match = (struct NANINF*)bsearch((void*)val,(void*)naninfnames,NNANINF,sizeof(struct NANINF),nicmp); + return (match == NULL ? NULL : &match->dvalue); +} diff --git a/libnczarr/zvar.c b/libnczarr/zvar.c index eb26eb7eda..2e5fbcc9fd 100644 --- a/libnczarr/zvar.c +++ b/libnczarr/zvar.c @@ -9,19 +9,24 @@ * @author Dennis Heimbigner, Ed Hartnett */ -#include "zincludes.h" #include /* For pow() used below. */ +#include "zincludes.h" +#include "zfill.h" /* Mnemonics */ #define CREATE 0 #define NOCREATE 1 +/* Forward */ +static int NCZ_fillin_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NC_TYPE_INFO_T* type, + size_t ndims, const int* dimids, int endianness); + #ifdef LOGGING static void reportchunking(const char* title, NC_VAR_INFO_T* var) { - int i; + size_t i; char buf[8192]; buf[0] = '\0'; /* for strlcat */ @@ -69,187 +74,26 @@ reportchunking(const char* title, NC_VAR_INFO_T* var) static int check_chunksizes(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, const size_t *chunksizes) { - double dprod; + size_t dprod; size_t type_len; - int d; + size_t d; int retval = NC_NOERR; if ((retval = nc4_get_typelen_mem(grp->nc4_info, var->type_info->hdr.id, &type_len))) goto done; if (var->type_info->nc_type_class == NC_VLEN) - dprod = (double)sizeof(nc_hvl_t); + dprod = sizeof(nc_hvl_t); else - dprod = (double)type_len; + dprod = type_len; for (d = 0; d < var->ndims; d++) - dprod *= (double)chunksizes[d]; + dprod *= chunksizes[d]; - if (dprod > (double) NC_MAX_UINT) + if (dprod > NC_MAX_UINT) {retval = NC_EBADCHUNK; goto done;} done: return retval; } -/** - * @internal Determine some default chunksizes for a variable. - * - * @param grp Pointer to the group info. - * @param var Pointer to the var info. - * - * @returns ::NC_NOERR for success - * @returns ::NC_EBADID Bad ncid. - * @returns ::NC_ENOTVAR Invalid variable ID. - * @author Dennis Heimbigner, Ed Hartnett - */ -int -ncz_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) -{ - int d; - size_t type_size; - float num_values = 1, num_unlim = 0; - int retval; - size_t suggested_size; -#ifdef LOGGING - double total_chunk_size; -#endif - - type_size = var->type_info->size; - -#ifdef LOGGING - /* Later this will become the total number of bytes in the default - * chunk. */ - total_chunk_size = (double) type_size; -#endif - - if(var->chunksizes == NULL) { - if((var->chunksizes = calloc(1,sizeof(size_t)*var->ndims)) == NULL) - return NC_ENOMEM; - } - - /* How many values in the variable (or one record, if there are - * unlimited dimensions). */ - for (d = 0; d < var->ndims; d++) - { - assert(var->dim[d]); - if (! var->dim[d]->unlimited) - num_values *= (float)var->dim[d]->len; - else { - num_unlim++; - var->chunksizes[d] = 1; /* overwritten below, if all dims are unlimited */ - } - } - /* Special case to avoid 1D vars with unlim dim taking huge amount - of space (DEFAULT_CHUNK_SIZE bytes). Instead we limit to about - 4KB */ - if (var->ndims == 1 && num_unlim == 1) { - if (DEFAULT_CHUNK_SIZE / type_size <= 0) - suggested_size = 1; - else if (DEFAULT_CHUNK_SIZE / type_size > DEFAULT_1D_UNLIM_SIZE) - suggested_size = DEFAULT_1D_UNLIM_SIZE; - else - suggested_size = DEFAULT_CHUNK_SIZE / type_size; - var->chunksizes[0] = suggested_size / type_size; - LOG((4, "%s: name %s dim %d DEFAULT_CHUNK_SIZE %d num_values %f type_size %d " - "chunksize %ld", __func__, var->hdr.name, d, DEFAULT_CHUNK_SIZE, num_values, type_size, var->chunksizes[0])); - } - if (var->ndims > 1 && var->ndims == num_unlim) { /* all dims unlimited */ - suggested_size = pow((double)DEFAULT_CHUNK_SIZE/type_size, 1.0/(double)(var->ndims)); - for (d = 0; d < var->ndims; d++) - { - var->chunksizes[d] = suggested_size ? suggested_size : 1; - LOG((4, "%s: name %s dim %d DEFAULT_CHUNK_SIZE %d num_values %f type_size %d " - "chunksize %ld", __func__, var->hdr.name, d, DEFAULT_CHUNK_SIZE, num_values, type_size, var->chunksizes[d])); - } - } - - /* Pick a chunk length for each dimension, if one has not already - * been picked above. */ - for (d = 0; d < var->ndims; d++) - if (!var->chunksizes[d]) - { - suggested_size = (pow((double)DEFAULT_CHUNK_SIZE/(num_values * type_size), - 1.0/(double)(var->ndims - num_unlim)) * var->dim[d]->len - .5); - if (suggested_size > var->dim[d]->len) - suggested_size = var->dim[d]->len; - var->chunksizes[d] = suggested_size ? suggested_size : 1; - LOG((4, "%s: name %s dim %d DEFAULT_CHUNK_SIZE %d num_values %f type_size %d " - "chunksize %ld", __func__, var->hdr.name, d, DEFAULT_CHUNK_SIZE, num_values, type_size, var->chunksizes[d])); - } - -#ifdef LOGGING - /* Find total chunk size. */ - for (d = 0; d < var->ndims; d++) - total_chunk_size *= (double) var->chunksizes[d]; - LOG((4, "total_chunk_size %f", total_chunk_size)); -#endif - - /* But did this result in a chunk that is too big? */ - retval = check_chunksizes(grp, var, var->chunksizes); - if (retval) - { - /* Other error? */ - if (retval != NC_EBADCHUNK) - return THROW(retval); - - /* Chunk is too big! Reduce each dimension by half and try again. */ - for ( ; retval == NC_EBADCHUNK; retval = check_chunksizes(grp, var, var->chunksizes)) - for (d = 0; d < var->ndims; d++) - var->chunksizes[d] = var->chunksizes[d]/2 ? var->chunksizes[d]/2 : 1; - } - - /* Do we have any big data overhangs? They can be dangerous to - * babies, the elderly, or confused campers who have had too much - * beer. */ - for (d = 0; d < var->ndims; d++) - { - size_t num_chunks; - size_t overhang; - assert(var->chunksizes[d] > 0); - num_chunks = (var->dim[d]->len + var->chunksizes[d] - 1) / var->chunksizes[d]; - if(num_chunks > 0) { - overhang = (num_chunks * var->chunksizes[d]) - var->dim[d]->len; - var->chunksizes[d] -= overhang / num_chunks; - } - } - -#ifdef LOGGING -reportchunking("find_default: ",var); -#endif - return NC_NOERR; -} - -#if 0 -/** - * @internal Give a var a secret ZARR name. This is needed when a var - * is defined with the same name as a dim, but it is not a coord var - * of that dim. In that case, the var uses a secret name inside the - * ZARR file. - * - * @param var Pointer to var info. - * @param name Name to use for base of secret name. - * - * @returns ::NC_NOERR No error. - * @returns ::NC_EMAXNAME Name too long to fit secret prefix. - * @returns ::NC_ENOMEM Out of memory. - * @author Dennis Heimbigner, Ed Hartnett - */ -static int -give_var_secret_name(NC_VAR_INFO_T *var, const char *name) -{ - /* Set a different ncz name for this variable to avoid name - * clash. */ - if (strlen(name) + strlen(NON_COORD_PREPEND) > NC_MAX_NAME) - return NC_EMAXNAME; - size_t ncz_name_size = (strlen(NON_COORD_PREPEND) + strlen(name) + 1) * - sizeof(char); - if (!(var->ncz_name = malloc(ncz_name_size))) - return NC_ENOMEM; - - snprintf(var->ncz_name, ncz_name_size, "%s%s", NON_COORD_PREPEND, name); - - return NC_NOERR; -} -#endif /*0*/ - /** * @internal This is called when a new netCDF-4 variable is defined * with nc_def_var(). @@ -283,20 +127,18 @@ give_var_secret_name(NC_VAR_INFO_T *var, const char *name) */ int NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, - const int *dimidsp, int *varidp) + const int *dimids, int *varidp) { NC_GRP_INFO_T *grp; NC_VAR_INFO_T *var; NC_DIM_INFO_T *dim; NC_FILE_INFO_T *h5; - NC_TYPE_INFO_T *type = NULL; - NCZ_VAR_INFO_T* zvar = NULL; char norm_name[NC_MAX_NAME + 1]; int d; int retval; - NCglobalstate* gstate = NC_getglobalstate(); + NC_TYPE_INFO_T *type = NULL; - ZTRACE(1,"ncid=%d name=%s xtype=%d ndims=%d dimids=%s",ncid,name,xtype,ndims,nczprint_idvector(ndims,dimidsp)); + ZTRACE(1,"ncid=%d name=%s xtype=%d ndims=%d dimids=%s",ncid,name,xtype,ndims,nczprint_idvector(ndims,dimids)); /* Find info for this file and group, and set pointer to each. */ if ((retval = nc4_find_grp_h5(ncid, &grp, &h5))) @@ -346,12 +188,12 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, BAIL(retval); /* For non-scalar vars, dim IDs must be provided. */ - if (ndims && !dimidsp) + if (ndims && !dimids) BAIL(NC_EINVAL); /* Check all the dimids to make sure they exist. */ for (d = 0; d < ndims; d++) - if ((retval = nc4_find_dim(grp, dimidsp[d], &dim, NULL))) + if ((retval = nc4_find_dim(grp, dimids[d], &dim, NULL))) BAIL(retval); /* These degrubbing messages sure are handy! */ @@ -360,7 +202,7 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, { int dd; for (dd = 0; dd < ndims; dd++) - LOG((4, "dimid[%d] %d", dd, dimidsp[dd])); + LOG((4, "dimid[%d] %d", dd, dimids[dd])); } #endif @@ -374,20 +216,52 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, if ((retval = nc4_var_list_add(grp, norm_name, ndims, &var))) BAIL(retval); + /* Set values for the remaining NC_VAR_INFO_T fields */ + retval = NCZ_fillin_var(h5, var, type, (size_t)ndims, dimids, NC_ENDIAN_NATIVE); + + if(retval == NC_NOERR) { + /* Return the varid. */ + if (varidp) + *varidp = var->hdr.id; + type = NULL; + } +exit: + if (type) + if ((retval = nc4_type_free(type))) + BAILLOG(retval); + return ZUNTRACE(retval); +} + +/** +Encapsulate the fillin of the variable data. Not all data is filled in, +but important &/or complex data is filled in. +*/ +static int +NCZ_fillin_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, NC_TYPE_INFO_T* type, + size_t ndims, const int* dimids, int endianness) +{ + int stat = NC_NOERR; + size_t d; + NC_GRP_INFO_T* grp = file->root_grp; /* only used to get back to file */ + NC_DIM_INFO_T* dim = NULL; + NCZ_VAR_INFO_T* zvar = NULL; + NCglobalstate* gs = NC_getglobalstate(); + nc_type vartypeid = NC_NAT; + size_t typesize = 0; + /* Add storage for NCZ-specific var info. */ - if (!(var->format_var_info = calloc(1, sizeof(NCZ_VAR_INFO_T)))) - BAIL(NC_ENOMEM); - zvar = var->format_var_info; - zvar->common.file = h5; + if (!(zvar = calloc(1, sizeof(NCZ_VAR_INFO_T)))) {stat = NC_ENOMEM; goto done;} + var->format_var_info = zvar; + zvar->common.file = file; zvar->scalar = (ndims == 0 ? 1 : 0); - zvar->dimension_separator = gstate->zarr.dimension_separator; + zvar->dimension_separator = gs->zarr.dimension_separator; assert(zvar->dimension_separator != 0); /* Set these state flags for the var. */ var->is_new_var = NC_TRUE; var->meta_read = NC_TRUE; - var->atts_read = NC_TRUE; + NCZ_setatts_read((NC_OBJ*)var); #ifdef NETCDF_ENABLE_NCZARR_FILTERS /* Set the filter list */ @@ -400,82 +274,92 @@ NCZ_def_var(int ncid, const char *name, nc_type xtype, int ndims, #ifdef LOOK var->type_info->rc++; #endif - type = NULL; + vartypeid = type->hdr.id; + typesize = type->size; /* Propagate the endianness */ - var->endianness = var->type_info->endianness; + if(endianness == NC_ENDIAN_NATIVE) + var->endianness = var->type_info->endianness; + else + var->endianness = endianness; + var->type_info->endianness = var->endianness; /* back prop */ + + /* Indicate we do not have quantizer yet */ + var->quantize_mode = 0; + + /* should we use contiguous or chunked storage. */ + var->storage = (zvar->scalar?NC_CONTIGUOUS:NC_CHUNKED); + + /* Assign dimensions to the variable. */ + + /* Save the rank of the variable */ + if((stat = nc4_var_set_ndims(var, (int)ndims))) goto done; + + - /* Set variables no_fill to match the database default unless the - * variable type is variable length (NC_STRING or NC_VLEN) or is - * user-defined type. */ - if (var->type_info->nc_type_class <= NC_STRING) - var->no_fill = (h5->fill_mode == NC_NOFILL); - - /* Assign dimensions to the variable. At the same time, check to - * see if this is a coordinate variable. If so, it will have the - * same name as one of its dimensions. If it is a coordinate var, - * is it a coordinate var in the same group as the dim? Also, check - * whether we should use contiguous or chunked storage. */ - var->storage = NC_CHUNKED; for (d = 0; d < ndims; d++) { NC_GRP_INFO_T *dim_grp; /* Look up each dimension */ - if ((retval = nc4_find_dim(grp, dimidsp[d], &dim, &dim_grp))) - BAIL(retval); + if ((stat = nc4_find_dim(grp, dimids[d], &dim, &dim_grp))) goto done; assert(dim && dim->format_dim_info); /* Check for unlimited dimension and turn off contiguous storage. */ if (dim->unlimited) var->storage = NC_CHUNKED; /* Track dimensions for variable */ - var->dimids[d] = dimidsp[d]; + var->dimids[d] = dimids[d]; var->dim[d] = dim; } - /* Determine default chunksizes for this variable. (Even for - * variables which may be contiguous.) */ + /* Determine chunksizes for this variable. (Even + * variables which may be scalar) */ LOG((4, "allocating array of %d size_t to hold chunksizes for var %s", var->ndims, var->hdr.name)); - if(!var->chunksizes) { - if(var->ndims) { - if (!(var->chunksizes = calloc(var->ndims, sizeof(size_t)))) - BAIL(NC_ENOMEM); - if ((retval = ncz_find_default_chunksizes2(grp, var))) - BAIL(retval); - } else { - /* Pretend that scalars are like var[1] */ - if (!(var->chunksizes = calloc(1, sizeof(size_t)))) - BAIL(NC_ENOMEM); - var->chunksizes[0] = 1; - } + if(var->ndims > 0) { + assert(var->chunksizes == NULL); + if (!(var->chunksizes = calloc(var->ndims, sizeof(size_t)))) {stat = NC_ENOMEM; goto done;} + if ((stat = ncz_find_default_chunksizes2(grp, var))) goto done; + } else { + /* Pretend that scalars are like var[1] */ + if (!(var->chunksizes = calloc(1, sizeof(size_t)))) {stat = NC_ENOMEM; goto done;} + var->chunksizes[0] = 1; } /* Compute the chunksize cross product */ zvar->chunkproduct = 1; if(!zvar->scalar) - {for(d=0;dndims;d++) {zvar->chunkproduct *= var->chunksizes[d];}} - zvar->chunksize = zvar->chunkproduct * var->type_info->size; + {size_t k; for(k=0;kndims;k++) {zvar->chunkproduct *= var->chunksizes[k];}} + zvar->chunksize = zvar->chunkproduct * typesize; /* Set cache defaults */ - var->chunkcache = gstate->chunkcache; + var->chunkcache = gs->chunkcache; /* Create the cache */ - if((retval=NCZ_create_chunk_cache(var,zvar->chunkproduct*var->type_info->size,zvar->dimension_separator,&zvar->cache))) - BAIL(retval); + if((stat=NCZ_create_chunk_cache(var,zvar->chunkproduct*typesize,zvar->dimension_separator,&zvar->cache))) + goto done; /* Set the per-variable chunkcache defaults */ zvar->cache->params = var->chunkcache; - /* Return the varid. */ - if (varidp) - *varidp = var->hdr.id; - LOG((4, "new varid %d", var->hdr.id)); + /* Set variables no_fill to match the database default unless the + * variable type is variable length (NC_STRING or NC_VLEN) or is + * user-defined type. */ + if (var->type_info->nc_type_class <= NC_STRING) { /* Make fill flags consistent */ + var->no_fill = file->fill_mode; + if(var->no_fill == NC_NOFILL) { + if((stat = NCZ_disable_fill(file,var))) goto done; + var->fill_val_changed = 0; /* But pretend it has not been changed */ + } else { + const void* dfaltfillvalue = NCZ_getdfaltfillvalue(vartypeid); + /* Since do not have the variable attributes (yet), set var->fill_value to default */ + if((stat = NC_copy_data_all(file->controller,vartypeid,dfaltfillvalue,1,&var->fill_value))) goto done; + var->fill_val_changed = 1; + } + } + if((stat = NCZ_adjust_var_cache(var))) goto done; -exit: - if (type) - if ((retval = nc4_type_free(type))) - BAILLOG(retval); - return ZUNTRACE(retval); +done: + return THROW(stat); } /** @@ -513,43 +397,46 @@ static int ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, int *unused2, int *fletcher32, int *storagep, const size_t *chunksizes, int *no_fill, - const void *fill_value, int *endianness, + const void *fillvalue, int *endianness, int *quantize_mode, int *nsd) { NC_GRP_INFO_T *grp; NC_FILE_INFO_T *h5; NC_VAR_INFO_T *var; NCZ_VAR_INFO_T *zvar; - int d; - int retval = NC_NOERR; + size_t d; + int stat = NC_NOERR; int storage = NC_CHUNKED; size_t contigchunksizes[NC_MAX_VAR_DIMS]; /* Fake chunksizes if storage is contiguous or compact */ + NC_UNUSED(unused1); + NC_UNUSED(unused2); + LOG((2, "%s: ncid 0x%x varid %d", __func__, ncid, varid)); - ZTRACE(2,"ncid=%d varid=%d shuffle=%d fletcher32=%d no_fill=%d, fill_value=%p endianness=%d quantize_mode=%d nsd=%d", + ZTRACE(2,"ncid=%d varid=%d shuffle=%d fletcher32=%d no_fill=%d, fillvalue=%p endianness=%d quantize_mode=%d nsd=%d", ncid,varid, (shuffle?*shuffle:-1), (fletcher32?*fletcher32:-1), (no_fill?*no_fill:-1), - fill_value, + fillvalue, (endianness?*endianness:-1), (quantize_mode?*quantize_mode:-1), (nsd?*nsd:-1) ); /* Find info for this file and group, and set pointer to each. */ - if ((retval = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5))) + if ((stat = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5))) goto done; assert(grp && h5); /* Trying to write to a read-only file? No way, Jose! */ if (h5->no_write) - {retval = NC_EPERM; goto done;} + {stat = NC_EPERM; goto done;} /* Find the var. */ - if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid))) - {retval = NC_ENOTVAR; goto done;} + if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, (size_t)varid))) + {stat = NC_ENOTVAR; goto done;} assert(var && var->hdr.id == varid); zvar = var->format_var_info; @@ -562,61 +449,29 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, #ifndef HDF5_SUPPORTS_PAR_FILTERS if (h5->parallel == NC_TRUE) if (nclistlength(((NClist*)var->filters)) > 0 || fletcher32 || shuffle) - {retval = NC_EINVAL; goto done;} + {stat = NC_EINVAL; goto done;} #endif #endif /* If the HDF5 dataset has already been created, then it is too * late to set all the extra stuff. */ if (var->created) - {retval = NC_ELATEDEF; goto done;} - -#if 0 - /* Check compression options. */ - if (deflate && !deflate_level) - {retval = NC_EINVAL; goto done;} - - /* Valid deflate level? */ - if (deflate) - { - if (*deflate) - if (*deflate_level < NC_MIN_DEFLATE_LEVEL || - *deflate_level > NC_MAX_DEFLATE_LEVEL) - {retval = NC_EINVAL; goto done;} - - /* For scalars, just ignore attempt to deflate. */ - if (!var->ndims) - goto done; - - /* If szip is in use, return an error. */ - if ((retval = nc_inq_var_szip(ncid, varid, &option_mask, NULL))) - goto done; - if (option_mask) - {retval = NC_EINVAL; goto done;} - - /* Set the deflate settings. */ - var->storage = NC_CONTIGUOUS; - var->deflate = *deflate; - if (*deflate) - var->deflate_level = *deflate_level; - LOG((3, "%s: *deflate_level %d", __func__, *deflate_level)); - } -#endif + {stat = NC_ELATEDEF; goto done;} /* Shuffle filter? */ if (shuffle && *shuffle) { - retval = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_SHUFFLE,NULL,NULL); - if(!retval || retval == NC_ENOFILTER) { - if((retval = NCZ_def_var_filter(ncid,varid,H5Z_FILTER_SHUFFLE,0,NULL))) return retval; + stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_SHUFFLE,NULL,NULL); + if(!stat || stat == NC_ENOFILTER) { + if((stat = NCZ_def_var_filter(ncid,varid,H5Z_FILTER_SHUFFLE,0,NULL))) return stat; var->storage = NC_CHUNKED; } } /* Fletcher32 checksum error protection? */ if (fletcher32 && fletcher32) { - retval = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_FLETCHER32,NULL,NULL); - if(!retval || retval == NC_ENOFILTER) { - if((retval = NCZ_def_var_filter(ncid,varid,H5Z_FILTER_FLETCHER32,0,NULL))) return retval; + stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_FLETCHER32,NULL,NULL); + if(!stat || stat == NC_ENOFILTER) { + if((stat = NCZ_def_var_filter(ncid,varid,H5Z_FILTER_FLETCHER32,0,NULL))) return stat; var->storage = NC_CHUNKED; } } @@ -632,11 +487,11 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, { #ifdef NCZARR_FILTERS if (nclistlength(((NClist*)var->filters)) > 0) - {retval = NC_EINVAL; goto done;} + {stat = NC_EINVAL; goto done;} #endif for (d = 0; d < var->ndims; d++) { if (var->dim[d]->unlimited) - {retval = NC_EINVAL; goto done;} + {stat = NC_EINVAL; goto done;} contigchunksizes[d] = var->dim[d]->len; /* Fake a single big chunk */ } chunksizes = (const size_t*)contigchunksizes; @@ -644,7 +499,7 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, } if (storage == NC_CHUNKED && var->ndims == 0) { - {retval = NC_EINVAL; goto done;} + {stat = NC_EINVAL; goto done;} } else if (storage == NC_CHUNKED && var->ndims > 0) { var->storage = NC_CHUNKED; @@ -653,14 +508,14 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, if (chunksizes) { /* Check the chunksizes for validity. */ - if ((retval = check_chunksizes(grp, var, chunksizes))) + if ((stat = check_chunksizes(grp, var, chunksizes))) goto done; /* Ensure chunksize is smaller than dimension size */ for (d = 0; d < var->ndims; d++) if (!var->dim[d]->unlimited && var->dim[d]->len > 0 && chunksizes[d] > var->dim[d]->len) - {retval = NC_EBADCHUNK; goto done;} + {stat = NC_EBADCHUNK; goto done;} } } @@ -680,7 +535,7 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, } /* If chunksizes == NULL or anyzero then use defaults */ if(chunksizes == NULL || anyzero) { /* Use default chunking */ - if ((retval = ncz_find_default_chunksizes2(grp, var))) + if ((stat = ncz_find_default_chunksizes2(grp, var))) goto done; } assert(var->chunksizes != NULL); @@ -691,7 +546,7 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, zvar->chunksize = zvar->chunkproduct * var->type_info->size; } /* Adjust cache */ - if((retval = NCZ_adjust_var_cache(var))) goto done; + if((stat = NCZ_adjust_var_cache(var))) goto done; #ifdef LOGGING { @@ -710,7 +565,7 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, * by HDF5 and will cause a HDF5 error later. */ if (*no_fill) if (var->type_info->hdr.id == NC_STRING) - {retval = NC_EINVAL; goto done;} + {stat = NC_EINVAL; goto done;} /* Set the no-fill mode. */ var->no_fill = NC_TRUE; @@ -719,55 +574,43 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, var->no_fill = NC_FALSE; } - /* Are we setting a fill value? */ - if (fill_value && no_fill && !(*no_fill)) - { - /* Copy the fill_value. */ - LOG((4, "Copying fill value into metadata for variable %s", - var->hdr.name)); - - /* If there's a _FillValue attribute, delete it. */ - retval = NCZ_del_att(ncid, varid, NC_FillValue); - if (retval && retval != NC_ENOTATT) - goto done; - - /* Create a _FillValue attribute; will also fill in var->fill_value */ - if ((retval = nc_put_att(ncid, varid, NC_FillValue, var->type_info->hdr.id, - 1, fill_value))) - goto done; - /* Reclaim any existing fill_chunk */ - if((retval = NCZ_reclaim_fill_chunk(zvar->cache))) goto done; - } else if (var->fill_value && no_fill && (*no_fill)) { /* Turning off fill value? */ - /* If there's a _FillValue attribute, delete it. */ - retval = NCZ_del_att(ncid, varid, NC_FillValue); - if (retval && retval != NC_ENOTATT) return retval; - if((retval = NCZ_reclaim_fill_value(var))) return retval; + /* Are we setting or disabling a fill value? */ + if(no_fill) { + if(fillvalue && *no_fill == NC_NOFILL) { + var->no_fill = NC_NOFILL; + if((stat = NCZ_disable_fill(h5,var))) goto done; + var->fill_val_changed = 0; /* But pretend it has not been changed */ + } else { + /* synchronize to Attribute */ + if((stat = NCZ_sync_dual_att(h5,(NC_OBJ*)var,NC_FillValue,DA_FILLVALUE,FIXATT))) goto done; + var->fill_val_changed = 1; + } } /* Is the user setting the endianness? */ if (endianness) { - /* Setting endianness is only premitted on atomic integer and - * atomic float types. */ - switch (var->type_info->hdr.id) - { - case NC_BYTE: - case NC_SHORT: - case NC_INT: - case NC_FLOAT: - case NC_DOUBLE: - case NC_UBYTE: - case NC_USHORT: - case NC_UINT: - case NC_INT64: - case NC_UINT64: - break; - default: - {retval = NC_EINVAL; goto done;} - } - var->type_info->endianness = *endianness; - /* Propagate */ - var->endianness = *endianness; + /* Setting endianness is only premitted on atomic integer and + * atomic float types. */ + switch (var->type_info->hdr.id) + { + case NC_BYTE: + case NC_SHORT: + case NC_INT: + case NC_FLOAT: + case NC_DOUBLE: + case NC_UBYTE: + case NC_USHORT: + case NC_UINT: + case NC_INT64: + case NC_UINT64: + break; + default: + {stat = NC_EINVAL; goto done;} + } + var->type_info->endianness = *endianness; + /* Propagate */ + var->endianness = *endianness; } /* Remember quantization settings. They will be used when data are @@ -775,63 +618,63 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, * Code block is identical to one in hdf5var.c---consider functionalizing */ if (quantize_mode) { - /* Only four valid mode settings. */ - if (*quantize_mode != NC_NOQUANTIZE && - *quantize_mode != NC_QUANTIZE_BITGROOM && - *quantize_mode != NC_QUANTIZE_GRANULARBR && - *quantize_mode != NC_QUANTIZE_BITROUND) - return NC_EINVAL; - - if (*quantize_mode == NC_QUANTIZE_BITGROOM || - *quantize_mode == NC_QUANTIZE_GRANULARBR || - *quantize_mode == NC_QUANTIZE_BITROUND) - { - - /* Only float and double types can have quantization. */ - if (var->type_info->hdr.id != NC_FLOAT && - var->type_info->hdr.id != NC_DOUBLE) - return NC_EINVAL; - - /* All quantization codecs require number of significant digits */ - if (!nsd) - return NC_EINVAL; - - /* NSD must be in range. */ - if (*nsd <= 0) - return NC_EINVAL; - - if (*quantize_mode == NC_QUANTIZE_BITGROOM || - *quantize_mode == NC_QUANTIZE_GRANULARBR) - { - if (var->type_info->hdr.id == NC_FLOAT && - *nsd > NC_QUANTIZE_MAX_FLOAT_NSD) - return NC_EINVAL; - if (var->type_info->hdr.id == NC_DOUBLE && - *nsd > NC_QUANTIZE_MAX_DOUBLE_NSD) - return NC_EINVAL; - } - else if (*quantize_mode == NC_QUANTIZE_BITROUND) - { - if (var->type_info->hdr.id == NC_FLOAT && - *nsd > NC_QUANTIZE_MAX_FLOAT_NSB) - return NC_EINVAL; - if (var->type_info->hdr.id == NC_DOUBLE && - *nsd > NC_QUANTIZE_MAX_DOUBLE_NSB) - return NC_EINVAL; - } - - var->nsd = *nsd; - } - - var->quantize_mode = *quantize_mode; + /* Only four valid mode settings. */ + if (*quantize_mode != NC_NOQUANTIZE && + *quantize_mode != NC_QUANTIZE_BITGROOM && + *quantize_mode != NC_QUANTIZE_GRANULARBR && + *quantize_mode != NC_QUANTIZE_BITROUND) + return NC_EINVAL; + + if (*quantize_mode == NC_QUANTIZE_BITGROOM || + *quantize_mode == NC_QUANTIZE_GRANULARBR || + *quantize_mode == NC_QUANTIZE_BITROUND) + { + + /* Only float and double types can have quantization. */ + if (var->type_info->hdr.id != NC_FLOAT && + var->type_info->hdr.id != NC_DOUBLE) + return NC_EINVAL; + + /* All quantization codecs require number of significant digits */ + if (!nsd) + return NC_EINVAL; + + /* NSD must be in range. */ + if (*nsd <= 0) + return NC_EINVAL; + + if (*quantize_mode == NC_QUANTIZE_BITGROOM || + *quantize_mode == NC_QUANTIZE_GRANULARBR) + { + if (var->type_info->hdr.id == NC_FLOAT && + *nsd > NC_QUANTIZE_MAX_FLOAT_NSD) + return NC_EINVAL; + if (var->type_info->hdr.id == NC_DOUBLE && + *nsd > NC_QUANTIZE_MAX_DOUBLE_NSD) + return NC_EINVAL; + } + else if (*quantize_mode == NC_QUANTIZE_BITROUND) + { + if (var->type_info->hdr.id == NC_FLOAT && + *nsd > NC_QUANTIZE_MAX_FLOAT_NSB) + return NC_EINVAL; + if (var->type_info->hdr.id == NC_DOUBLE && + *nsd > NC_QUANTIZE_MAX_DOUBLE_NSB) + return NC_EINVAL; + } + + var->nsd = *nsd; + } + + var->quantize_mode = *quantize_mode; - /* If quantization is turned off, then set nsd to 0. */ - if (*quantize_mode == NC_NOQUANTIZE) - var->nsd = 0; + /* If quantization is turned off, then set nsd to 0. */ + if (*quantize_mode == NC_NOQUANTIZE) + var->nsd = 0; } done: - return ZUNTRACE(retval); + return ZUNTRACE(stat); } /** @@ -857,14 +700,14 @@ ncz_def_var_extra(int ncid, int varid, int *shuffle, int *unused1, */ int NCZ_def_var_deflate(int ncid, int varid, int shuffle, int deflate, - int deflate_level) + int deflate_level) { int stat = NC_NOERR; unsigned int level = (unsigned int)deflate_level; /* Set shuffle first */ if((stat = ncz_def_var_extra(ncid, varid, &shuffle, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))) goto done; if(deflate) - stat = nc_def_var_filter(ncid, varid, H5Z_FILTER_DEFLATE,1,&level); + stat = nc_def_var_filter(ncid, varid, H5Z_FILTER_DEFLATE,1,&level); if(stat) goto done; done: return stat; @@ -892,7 +735,7 @@ int NCZ_def_var_fletcher32(int ncid, int varid, int fletcher32) { return ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, &fletcher32, - NULL, NULL, NULL, NULL, NULL, NULL, NULL); + NULL, NULL, NULL, NULL, NULL, NULL, NULL); } /** @@ -921,7 +764,7 @@ int NCZ_def_var_chunking(int ncid, int varid, int contiguous, const size_t *chunksizesp) { return ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, NULL, - &contiguous, chunksizesp, NULL, NULL, NULL, NULL, NULL); + &contiguous, chunksizesp, NULL, NULL, NULL, NULL, NULL); } /** @@ -949,27 +792,28 @@ ncz_def_var_chunking_ints(int ncid, int varid, int contiguous, int *chunksizesp) { NC_VAR_INFO_T *var; size_t *cs; - int i, retval; + int retval; + size_t i; /* Get pointer to the var. */ if ((retval = nc4_find_grp_h5_var(ncid, varid, NULL, NULL, &var))) - return THROW(retval); + return THROW(retval); assert(var); /* Allocate space for the size_t copy of the chunksizes array. */ if (var->ndims) - if (!(cs = malloc(var->ndims * sizeof(size_t)))) - return NC_ENOMEM; + if (!(cs = malloc(var->ndims * sizeof(size_t)))) + return NC_ENOMEM; /* Copy to size_t array. */ for (i = 0; i < var->ndims; i++) - cs[i] = chunksizesp[i]; + cs[i] = (size_t)chunksizesp[i]; retval = ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, NULL, - &contiguous, cs, NULL, NULL, NULL, NULL, NULL); + &contiguous, cs, NULL, NULL, NULL, NULL, NULL); if (var->ndims) - free(cs); + free(cs); return THROW(retval); } @@ -1000,7 +844,7 @@ int NCZ_def_var_fill(int ncid, int varid, int no_fill, const void *fill_value) { return ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, NULL, NULL, - NULL, &no_fill, fill_value, NULL, NULL, NULL); + NULL, &no_fill, fill_value, NULL, NULL, NULL); } /** @@ -1029,7 +873,7 @@ int NCZ_def_var_endian(int ncid, int varid, int endianness) { return ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, &endianness, NULL, NULL); + NULL, NULL, NULL, &endianness, NULL, NULL); } /** @@ -1114,8 +958,8 @@ int NCZ_def_var_quantize(int ncid, int varid, int quantize_mode, int nsd) { return ncz_def_var_extra(ncid, varid, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, - &quantize_mode, &nsd); + NULL, NULL, NULL, NULL, NULL, + &quantize_mode, &nsd); } /** @@ -1130,16 +974,16 @@ NCZ_ensure_quantizer(int ncid, NC_VAR_INFO_T* var) /* Read the attribute */ if(NCZ_get_att(ncid,var->hdr.id,NC_QUANTIZE_BITGROOM_ATT_NAME,&nsd,NC_INT)==NC_NOERR) { - var->quantize_mode = NC_QUANTIZE_BITGROOM; + var->quantize_mode = NC_QUANTIZE_BITGROOM; var->nsd = nsd; } else if(NCZ_get_att(ncid,var->hdr.id,NC_QUANTIZE_GRANULARBR_ATT_NAME,&nsd,NC_INT)==NC_NOERR) { - var->quantize_mode = NC_QUANTIZE_GRANULARBR; + var->quantize_mode = NC_QUANTIZE_GRANULARBR; var->nsd = nsd; } else if(NCZ_get_att(ncid,var->hdr.id,NC_QUANTIZE_BITROUND_ATT_NAME,&nsd,NC_INT)==NC_NOERR) { - var->quantize_mode = NC_QUANTIZE_BITROUND; + var->quantize_mode = NC_QUANTIZE_BITROUND; var->nsd = nsd; } else { - var->quantize_mode = NC_NOQUANTIZE; + var->quantize_mode = NC_NOQUANTIZE; var->nsd = 0; } if(var->quantize_mode < 0) var->quantize_mode = 0; @@ -1164,7 +1008,7 @@ NCZ_ensure_quantizer(int ncid, NC_VAR_INFO_T* var) */ int NCZ_inq_var_quantize(int ncid, int varid, int *quantize_modep, - int *nsdp) + int *nsdp) { NC_VAR_INFO_T *var; int retval; @@ -1176,9 +1020,9 @@ NCZ_inq_var_quantize(int ncid, int varid, int *quantize_modep, if ((retval = nc4_find_grp_h5_var(ncid, varid, NULL, NULL, &var))) return retval; if (!var) - return NC_ENOTVAR; + return NC_ENOTVAR; assert(var->hdr.id == varid); - if(var->quantize_mode == -1) + if(var->quantize_mode == 0) {if((retval = NCZ_ensure_quantizer(ncid, var))) return retval;} /* Copy the data to the user's data buffers. */ if (quantize_modep) @@ -1229,7 +1073,7 @@ NCZ_rename_var(int ncid, int varid, const char *name) int retval = NC_NOERR; if (!name) - return NC_EINVAL; + return NC_EINVAL; LOG((2, "%s: ncid 0x%x varid %d name %s", __func__, ncid, varid, name)); @@ -1237,160 +1081,155 @@ NCZ_rename_var(int ncid, int varid, const char *name) /* Find info for this file and group, and set pointer to each. */ if ((retval = nc4_find_grp_h5(ncid, &grp, &h5))) - return THROW(retval); + return THROW(retval); assert(h5 && grp && grp->format_grp_info); /* Is the new name too long? */ if (strlen(name) > NC_MAX_NAME) - return NC_EMAXNAME; + return NC_EMAXNAME; /* Trying to write to a read-only file? No way, Jose! */ if (h5->no_write) - return NC_EPERM; + return NC_EPERM; /* Check name validity, if strict nc3 rules are in effect for this * file. */ if ((retval = NC_check_name(name))) - return THROW(retval); + return THROW(retval); /* Get the variable wrt varid */ - if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid))) - return NC_ENOTVAR; + if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, (size_t)varid))) + return NC_ENOTVAR; /* Check if new name is in use; note that renaming to same name is still an error according to the nc_test/test_write.c code. Why?*/ if (ncindexlookup(grp->vars, name)) - return NC_ENAMEINUSE; + return THROW(NC_ENAMEINUSE); /* If we're not in define mode, new name must be of equal or less size, if strict nc3 rules are in effect for this . */ if (!(h5->flags & NC_INDEF) && strlen(name) > strlen(var->hdr.name) && - (h5->cmode & NC_CLASSIC_MODEL)) - return NC_ENOTINDEFINE; + (h5->cmode & NC_CLASSIC_MODEL)) + return NC_ENOTINDEFINE; #ifdef LOOK /* Is there another dim with this name, for which this var will not * be a coord var? If so, we have to create a dim without a * variable for the old name. */ if ((other_dim = (NC_DIM_INFO_T *)ncindexlookup(grp->dim, name)) && - strcmp(name, var->dim[0]->hdr.name)) + strcmp(name, var->dim[0]->hdr.name)) { - /* Create a dim without var dataset for old dim. */ - if ((retval = ncz_create_dim_wo_var(other_dim))) - return THROW(retval); - - /* Give this var a secret ZARR name so it can co-exist in file - * with dim wp var dataset. Base the secret name on the new var - * name. */ - if ((retval = give_var_secret_name(var, name))) - return THROW(retval); - use_secret_name++; + /* Create a dim without var dataset for old dim. */ + if ((retval = ncz_create_dim_wo_var(other_dim))) + return THROW(retval); + + /* Give this var a secret ZARR name so it can co-exist in file + * with dim wp var dataset. Base the secret name on the new var + * name. */ + if ((retval = give_var_secret_name(var, name))) + return THROW(retval); + use_secret_name++; } /* Change the ZARR file, if this var has already been created there. */ if (var->created) { - int v; - char *ncz_name; /* Dataset will be renamed to this. */ - ncz_name = use_secret_name ? var->ncz_name: (char *)name; - - /* Do we need to read var metadata? */ - if (!var->meta_read) - if ((retval = ncz_get_var_meta(var))) - return THROW(retval); - - if (var->ndims) - { - NCZ_DIM_INFO_T *ncz_d0; - ncz_d0 = (NCZ_DIM_INFO_T *)var->dim[0]->format_dim_info; + int v; + char *ncz_name; /* Dataset will be renamed to this. */ + ncz_name = use_secret_name ? var->ncz_name: (char *)name; - /* Is there an existing dimscale-only dataset of this name? If - * so, it must be deleted. */ - if (ncz_d0->hdf_dimscaleid) - { - if ((retval = delete_dimscale_dataset(grp, var->dim[0]->hdr.id, - var->dim[0]))) - return THROW(retval); - } - } + if (var->ndims) + { + NCZ_DIM_INFO_T *ncz_d0; + ncz_d0 = (NCZ_DIM_INFO_T *)var->dim[0]->format_dim_info; + + /* Is there an existing dimscale-only dataset of this name? If + * so, it must be deleted. */ + if (ncz_d0->hdf_dimscaleid) + { + if ((retval = delete_dimscale_dataset(grp, var->dim[0]->hdr.id, + var->dim[0]))) + return THROW(retval); + } + } - LOG((3, "Moving dataset %s to %s", var->hdr.name, name)); - if (H5Lmove(ncz_grp->hdf_grpid, var->hdr.name, ncz_grp->hdf_grpid, - ncz_name, H5P_DEFAULT, H5P_DEFAULT) < 0) - return NC_EHDFERR; - - /* Rename all the vars in this file with a varid greater than - * this var. Varids are assigned based on dataset creation time, - * and we have just changed that for this var. We must do the - * same for all vars with a > varid, so that the creation order - * will continue to be correct. */ - for (v = var->hdr.id + 1; v < ncindexsize(grp->vars); v++) - { - NC_VAR_INFO_T *my_var; - my_var = (NC_VAR_INFO_T *)ncindexith(grp->vars, v); - assert(my_var); + LOG((3, "Moving dataset %s to %s", var->hdr.name, name)); + if (H5Lmove(ncz_grp->hdf_grpid, var->hdr.name, ncz_grp->hdf_grpid, + ncz_name, H5P_DEFAULT, H5P_DEFAULT) < 0) + return NC_EHDFERR; + + /* Rename all the vars in this file with a varid greater than + * this var. Varids are assigned based on dataset creation time, + * and we have just changed that for this var. We must do the + * same for all vars with a > varid, so that the creation order + * will continue to be correct. */ + for (v = var->hdr.id + 1; v < ncindexsize(grp->vars); v++) + { + NC_VAR_INFO_T *my_var; + my_var = (NC_VAR_INFO_T *)ncindexith(grp->vars, v); + assert(my_var); - LOG((3, "mandatory rename of %s to same name", my_var->hdr.name)); + LOG((3, "mandatory rename of %s to same name", my_var->hdr.name)); - /* Rename to temp name. */ - if (H5Lmove(ncz_grp->hdf_grpid, my_var->hdr.name, ncz_grp->hdf_grpid, - NC_TEMP_NAME, H5P_DEFAULT, H5P_DEFAULT) < 0) - return NC_EHDFERR; + /* Rename to temp name. */ + if (H5Lmove(ncz_grp->hdf_grpid, my_var->hdr.name, ncz_grp->hdf_grpid, + NC_TEMP_NAME, H5P_DEFAULT, H5P_DEFAULT) < 0) + return NC_EHDFERR; - /* Rename to real name. */ - if (H5Lmove(ncz_grp->hdf_grpid, NC_TEMP_NAME, ncz_grp->hdf_grpid, - my_var->hdr.name, H5P_DEFAULT, H5P_DEFAULT) < 0) - return NC_EHDFERR; - } + /* Rename to real name. */ + if (H5Lmove(ncz_grp->hdf_grpid, NC_TEMP_NAME, ncz_grp->hdf_grpid, + my_var->hdr.name, H5P_DEFAULT, H5P_DEFAULT) < 0) + return NC_EHDFERR; + } } #endif /* Now change the name in our metadata. */ free(var->hdr.name); if (!(var->hdr.name = strdup(name))) - return NC_ENOMEM; + return NC_ENOMEM; LOG((3, "var is now %s", var->hdr.name)); /* rebuild index. */ if (!ncindexrebuild(grp->vars)) - return NC_EINTERNAL; + return NC_EINTERNAL; #ifdef LOOK /* Check if this was a coordinate variable previously, but names * are different now */ if (var->dimscale && strcmp(var->hdr.name, var->dim[0]->hdr.name)) { - /* Break up the coordinate variable */ - if ((retval = ncz_break_coord_var(grp, var, var->dim[0]))) - return THROW(retval); + /* Break up the coordinate variable */ + if ((retval = ncz_break_coord_var(grp, var, var->dim[0]))) + return THROW(retval); } /* Check if this should become a coordinate variable. */ if (!var->dimscale) { - /* Only variables with >0 dimensions can become coordinate - * variables. */ - if (var->ndims) - { - NC_GRP_INFO_T *dim_grp; - NC_DIM_INFO_T *dim; - - /* Check to see if this is became a coordinate variable. If - * so, it will have the same name as dimension index 0. If it - * is a coordinate var, is it a coordinate var in the same - * group as the dim? */ - if ((retval = ncz_find_dim(grp, var->dimids[0], &dim, &dim_grp))) - return THROW(retval); - if (!strcmp(dim->hdr.name, name) && dim_grp == grp) - { - /* Reform the coordinate variable. */ - if ((retval = ncz_reform_coord_var(grp, var, dim))) - return THROW(retval); - var->became_coord_var = NC_TRUE; - } - } + /* Only variables with >0 dimensions can become coordinate + * variables. */ + if (var->ndims) + { + NC_GRP_INFO_T *dim_grp; + NC_DIM_INFO_T *dim; + + /* Check to see if this is became a coordinate variable. If + * so, it will have the same name as dimension index 0. If it + * is a coordinate var, is it a coordinate var in the same + * group as the dim? */ + if ((retval = ncz_find_dim(grp, var->dimids[0], &dim, &dim_grp))) + return THROW(retval); + if (!strcmp(dim->hdr.name, name) && dim_grp == grp) + { + /* Reform the coordinate variable. */ + if ((retval = ncz_reform_coord_var(grp, var, dim))) + return THROW(retval); + var->became_coord_var = NC_TRUE; + } + } } #endif @@ -1414,7 +1253,7 @@ NCZ_rename_var(int ncid, int varid, const char *name) */ int NCZ_put_vara(int ncid, int varid, const size_t *startp, - const size_t *countp, const void *op, int memtype) + const size_t *countp, const void *op, int memtype) { return NCZ_put_vars(ncid, varid, startp, countp, NULL, op, memtype); } @@ -1436,7 +1275,7 @@ NCZ_put_vara(int ncid, int varid, const size_t *startp, */ int NCZ_get_vara(int ncid, int varid, const size_t *startp, - const size_t *countp, void *ip, int memtype) + const size_t *countp, void *ip, int memtype) { return NCZ_get_vars(ncid, varid, startp, countp, NULL, ip, memtype); } @@ -1462,21 +1301,21 @@ check_for_vara(nc_type *mem_nc_type, NC_VAR_INFO_T *var, NC_FILE_INFO_T *h5) * as the mem type as well. */ assert(mem_nc_type); if (*mem_nc_type == NC_NAT) - *mem_nc_type = var->type_info->hdr.id; + *mem_nc_type = var->type_info->hdr.id; assert(*mem_nc_type); /* No NC_CHAR conversions, you pervert! */ if (var->type_info->hdr.id != *mem_nc_type && - (var->type_info->hdr.id == NC_CHAR || *mem_nc_type == NC_CHAR)) - return NC_ECHAR; + (var->type_info->hdr.id == NC_CHAR || *mem_nc_type == NC_CHAR)) + return NC_ECHAR; /* If we're in define mode, we can't read or write data. */ if (h5->flags & NC_INDEF) { - if (h5->cmode & NC_CLASSIC_MODEL) - return NC_EINDEFINE; - if ((retval = ncz_enddef_netcdf4_file(h5))) - return THROW(retval); + if (h5->cmode & NC_CLASSIC_MODEL) + return NC_EINDEFINE; + if ((retval = ncz_enddef_netcdf4_file(h5))) + return THROW(retval); } return NC_NOERR; @@ -1488,18 +1327,18 @@ check_for_vara(nc_type *mem_nc_type, NC_VAR_INFO_T *var, NC_FILE_INFO_T *h5) */ static void log_dim_info(NC_VAR_INFO_T *var, size64_t *fdims, size64_t *fmaxdims, - size64_t *start, size64_t *count) + size64_t *start, size64_t *count) { - int d2; + size_t d2; /* Print some debugging info... */ LOG((4, "%s: var name %s ndims %d", __func__, var->hdr.name, var->ndims)); LOG((4, "File space, and requested:")); for (d2 = 0; d2 < var->ndims; d2++) { - LOG((4, "fdims[%d]=%Ld fmaxdims[%d]=%Ld", d2, fdims[d2], d2, - fmaxdims[d2])); - LOG((4, "start[%d]=%Ld count[%d]=%Ld", d2, start[d2], d2, count[d2])); + LOG((4, "fdims[%d]=%Ld fmaxdims[%d]=%Ld", d2, fdims[d2], d2, + fmaxdims[d2])); + LOG((4, "start[%d]=%Ld count[%d]=%Ld", d2, start[d2], d2, count[d2])); } } #endif /* LOGGING */ @@ -1535,7 +1374,7 @@ log_dim_info(NC_VAR_INFO_T *var, size64_t *fdims, size64_t *fmaxdims, */ int NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, - const ptrdiff_t *stridep, const void *data, nc_type mem_nc_type) + const ptrdiff_t *stridep, const void *data, nc_type mem_nc_type) { NC_GRP_INFO_T *grp; NC_FILE_INFO_T *h5; @@ -1546,8 +1385,9 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, #endif size64_t fdims[NC_MAX_VAR_DIMS]; size64_t start[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS]; - size64_t stride[NC_MAX_VAR_DIMS], ones[NC_MAX_VAR_DIMS]; - int retval, range_error = 0, i, d2; + size64_t stride[NC_MAX_VAR_DIMS]; + int retval, range_error = 0; + size_t i, d2; void *bufr = NULL; int bufrd = 0; /* 1 => we allocated bufr */ int need_to_convert = 0; @@ -1564,11 +1404,11 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, /* Find info for this file, group, and var. */ if ((retval = nc4_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) - return THROW(retval); + return THROW(retval); assert(h5 && grp && var && var->hdr.id == varid && var->format_var_info); LOG((3, "%s: var->hdr.name %s mem_nc_type %d", __func__, - var->hdr.name, mem_nc_type)); + var->hdr.name, mem_nc_type)); if(h5->no_write) return NC_EPERM; @@ -1577,38 +1417,36 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, /* Cannot convert to user-defined types. */ if (mem_nc_type >= NC_FIRSTUSERTYPEID) - return THROW(NC_EINVAL); + return THROW(NC_EINVAL); /* Check some stuff about the type and the file. If the file must * be switched from define mode, it happens here. */ if ((retval = check_for_vara(&mem_nc_type, var, h5))) - return THROW(retval); + return THROW(retval); assert(!var->ndims || (startp && countp)); /* Convert from size_t and ptrdiff_t to size64_t */ /* Also do sanity checks */ if(var->ndims == 0) { /* scalar */ - start[0] = 0; - count[0] = 1; - stride[0] = 1; - ones[0] = 1; + start[0] = 0; + count[0] = 1; + stride[0] = 1; } else { for (i = 0; i < var->ndims; i++) { - /* Check for non-positive stride. */ - if (stridep && stridep[i] <= 0) - return NC_ESTRIDE; - - fdims[i] = var->dim[i]->len; - start[i] = startp[i]; - count[i] = countp ? countp[i] : fdims[i]; - stride[i] = stridep ? stridep[i] : 1; - ones[i] = 1; - - /* Check to see if any counts are zero. */ - if (!count[i]) - zero_count++; - } + /* Check for non-positive stride. */ + if (stridep && stridep[i] <= 0) + return NC_ESTRIDE; + + fdims[i] = var->dim[i]->len; + start[i] = startp[i]; + count[i] = countp ? countp[i] : fdims[i]; + stride[i] = stridep ? (size64_t)stridep[i] : 1; + + /* Check to see if any counts are zero. */ + if (!count[i]) + zero_count++; + } } @@ -1620,19 +1458,19 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, * put data beyond their current length. */ for (d2 = 0; d2 < var->ndims; d2++) { - size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index written */ - dim = var->dim[d2]; - assert(dim && dim->hdr.id == var->dimids[d2]); - if (count[d2] == 0) - endindex = start[d2]; /* fixup for zero read count */ - if (!dim->unlimited) - { - /* Allow start to equal dim size if count is zero. */ - if (start[d2] > fdims[d2] || (start[d2] == fdims[d2] && count[d2] > 0)) - BAIL_QUIET(NC_EINVALCOORDS); - if (!zero_count && endindex >= fdims[d2]) - BAIL_QUIET(NC_EEDGE); - } + size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index written */ + dim = var->dim[d2]; + assert(dim && dim->hdr.id == var->dimids[d2]); + if (count[d2] == 0) + endindex = start[d2]; /* fixup for zero read count */ + if (!dim->unlimited) + { + /* Allow start to equal dim size if count is zero. */ + if (start[d2] > fdims[d2] || (start[d2] == fdims[d2] && count[d2] > 0)) + BAIL_QUIET(NC_EINVALCOORDS); + if (!zero_count && endindex >= fdims[d2]) + BAIL_QUIET(NC_EEDGE); + } } @@ -1643,19 +1481,19 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, scalar. If it is, we won't try to set up a hyperslab. */ if (H5Sget_simple_extent_type(file_spaceid) == H5S_SCALAR) { - if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0) - BAIL(NC_EHDFERR); + if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0) + BAIL(NC_EHDFERR); } else { - if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, start, stride, - ones, count) < 0) - BAIL(NC_EHDFERR); - - /* Create a space for the memory, just big enough to hold the slab - we want. */ - if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0) - BAIL(NC_EHDFERR); + if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, start, stride, + ones, count) < 0) + BAIL(NC_EHDFERR); + + /* Create a space for the memory, just big enough to hold the slab + we want. */ + if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0) + BAIL(NC_EHDFERR); } #endif @@ -1664,39 +1502,39 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, * opaque or vlen types.) We also need to call this code if we are doing * quantization. */ if ((mem_nc_type != var->type_info->hdr.id && - mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE - && mem_nc_type != NC_VLEN) - || var->quantize_mode > 0) + mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE + && mem_nc_type != NC_VLEN) + || var->quantize_mode > 0) { - size_t file_type_size; + size_t file_type_size; - /* We must convert - allocate a buffer. */ - need_to_convert++; - if(zvar->scalar) - len = 1; + /* We must convert - allocate a buffer. */ + need_to_convert++; + if(zvar->scalar) + len = 1; else for (d2=0; d2ndims; d2++) - len *= countp[d2]; - - LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name, - var->type_info->hdr.id, len)); - - /* Later on, we will need to know the size of this type in the - * file. */ - assert(var->type_info->size); - file_type_size = var->type_info->size; - - /* If we're reading, we need bufr to have enough memory to store - * the data in the file. If we're writing, we need bufr to be - * big enough to hold all the data in the file's type. */ - if (len > 0) { - assert(bufr == NULL); - if (!(bufr = malloc(len * file_type_size))) - BAIL(NC_ENOMEM); - bufrd = 1; - } + len *= countp[d2]; + + LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name, + var->type_info->hdr.id, len)); + + /* Later on, we will need to know the size of this type in the + * file. */ + assert(var->type_info->size); + file_type_size = var->type_info->size; + + /* If we're reading, we need bufr to have enough memory to store + * the data in the file. If we're writing, we need bufr to be + * big enough to hold all the data in the file's type. */ + if (len > 0) { + assert(bufr == NULL); + if (!(bufr = malloc(len * file_type_size))) + BAIL(NC_ENOMEM); + bufrd = 1; + } } else - bufr = (void *)data; + bufr = (void *)data; /* Write this hyperslab from memory to file. Does the dataset have to be extended? If it's already extended to the required size, it will @@ -1704,110 +1542,110 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, if (var->ndims) { - for (d2 = 0; d2 < var->ndims; d2++) - { - size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index written */ - if (count[d2] == 0) - endindex = start[d2]; - dim = var->dim[d2]; - assert(dim && dim->hdr.id == var->dimids[d2]); - if (dim->unlimited) - { - if (!zero_count && endindex >= fdims[d2]) - { - dim->len = (endindex+1); - } - else - dim->len = fdims[d2]; - - if (!zero_count && endindex >= dim->len) - { - dim->len = endindex+1; - dim->extended = NC_TRUE; - } - } - } + for (d2 = 0; d2 < var->ndims; d2++) + { + size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index written */ + if (count[d2] == 0) + endindex = start[d2]; + dim = var->dim[d2]; + assert(dim && dim->hdr.id == var->dimids[d2]); + if (dim->unlimited) + { + if (!zero_count && endindex >= fdims[d2]) + { + dim->len = (endindex+1); + } + else + dim->len = fdims[d2]; + + if (!zero_count && endindex >= dim->len) + { + dim->len = endindex+1; + dim->extended = NC_TRUE; + } + } + } #ifdef LOOK - /* If we need to extend it, we also need a new file_spaceid - to reflect the new size of the space. */ - if (need_to_extend) - { - LOG((4, "extending dataset")); - /* Convert xtend_size back to hsize_t for use with - * H5Dset_extent. */ - for (d2 = 0; d2 < var->ndims; d2++) - fdims[d2] = (size64_t)xtend_size[d2]; - if (H5Dset_extent(ncz_var->hdf_datasetid, fdims) < 0) - BAIL(NC_EHDFERR); - if (file_spaceid > 0 && H5Sclose(file_spaceid) < 0) - BAIL2(NC_EHDFERR); - if ((file_spaceid = H5Dget_space(ncz_var->hdf_datasetid)) < 0) - BAIL(NC_EHDFERR); - if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, - start, stride, ones, count) < 0) - BAIL(NC_EHDFERR); - } + /* If we need to extend it, we also need a new file_spaceid + to reflect the new size of the space. */ + if (need_to_extend) + { + LOG((4, "extending dataset")); + /* Convert xtend_size back to hsize_t for use with + * H5Dset_extent. */ + for (d2 = 0; d2 < var->ndims; d2++) + fdims[d2] = (size64_t)xtend_size[d2]; + if (H5Dset_extent(ncz_var->hdf_datasetid, fdims) < 0) + BAIL(NC_EHDFERR); + if (file_spaceid > 0 && H5Sclose(file_spaceid) < 0) + BAIL2(NC_EHDFERR); + if ((file_spaceid = H5Dget_space(ncz_var->hdf_datasetid)) < 0) + BAIL(NC_EHDFERR); + if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, + start, stride, ones, count) < 0) + BAIL(NC_EHDFERR); + } #endif } /* Do we need to convert the data? */ if (need_to_convert) { - if(var->quantize_mode < 0) {if((retval = NCZ_ensure_quantizer(ncid,var))) BAIL(retval);} - assert(bufr != NULL); - if ((retval = nc4_convert_type(data, bufr, mem_nc_type, var->type_info->hdr.id, - len, &range_error, var->fill_value, - (h5->cmode & NC_CLASSIC_MODEL), - var->quantize_mode, var->nsd))) - BAIL(retval); + if(var->quantize_mode == 0) {if((retval = NCZ_ensure_quantizer(ncid,var))) BAIL(retval);} + assert(bufr != NULL); + if ((retval = nc4_convert_type(data, bufr, mem_nc_type, var->type_info->hdr.id, + len, &range_error, var->fill_value, + (h5->cmode & NC_CLASSIC_MODEL), + var->quantize_mode, var->nsd))) + BAIL(retval); } #ifdef LOOK /* Write the data. At last! */ LOG((4, "about to write datasetid 0x%x mem_spaceid 0x%x " - "file_spaceid 0x%x", ncz_var->hdf_datasetid, mem_spaceid, file_spaceid)); + "file_spaceid 0x%x", ncz_var->hdf_datasetid, mem_spaceid, file_spaceid)); if (H5Dwrite(ncz_var->hdf_datasetid, - ((NCZ_TYPE_INFO_T *)var->type_info->format_type_info)->hdf_typeid, - mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0) - BAIL(NC_EHDFERR); + ((NCZ_TYPE_INFO_T *)var->type_info->format_type_info)->hdf_typeid, + mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0) + BAIL(NC_EHDFERR); #endif /*LOOK*/ if((retval = NCZ_transferslice(var, WRITING, start, count, stride, bufr, var->type_info->hdr.id))) - BAIL(retval); + BAIL(retval); /* Remember that we have written to this var so that Fill Value * can't be set for it. */ if (!var->written_to) - var->written_to = NC_TRUE; + var->written_to = NC_TRUE; /* For strict netcdf-3 rules, ignore erange errors between UBYTE * and BYTE types. */ if ((h5->cmode & NC_CLASSIC_MODEL) && - (var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) && - (mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) && - range_error) - range_error = 0; + (var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) && + (mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) && + range_error) + range_error = 0; exit: #ifdef LOOK if (file_spaceid > 0 && H5Sclose(file_spaceid) < 0) - BAIL2(NC_EHDFERR); + BAIL2(NC_EHDFERR); if (mem_spaceid > 0 && H5Sclose(mem_spaceid) < 0) - BAIL2(NC_EHDFERR); + BAIL2(NC_EHDFERR); if (xfer_plistid && (H5Pclose(xfer_plistid) < 0)) - BAIL2(NC_EPARINIT); + BAIL2(NC_EPARINIT); #endif if (bufrd && bufr) free(bufr); /* If there was an error return it, otherwise return any potential range error value. If none, return NC_NOERR as usual.*/ if (retval) - return THROW(retval); + return THROW(retval); if (range_error) - return NC_ERANGE; + return NC_ERANGE; return NC_NOERR; } @@ -1842,7 +1680,7 @@ NCZ_put_vars(int ncid, int varid, const size_t *startp, const size_t *countp, */ int NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, - const ptrdiff_t *stridep, void *data, nc_type mem_nc_type) + const ptrdiff_t *stridep, void *data, nc_type mem_nc_type) { NC_GRP_INFO_T *grp; NC_FILE_INFO_T *h5; @@ -1853,10 +1691,10 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, size64_t fdims[NC_MAX_VAR_DIMS]; /* size of the dimensions */ size64_t start[NC_MAX_VAR_DIMS]; size64_t stride[NC_MAX_VAR_DIMS]; - size64_t ones[NC_MAX_VAR_DIMS]; int no_read = 0, provide_fill = 0; int fill_value_size[NC_MAX_VAR_DIMS]; - int retval, range_error = 0, i, d2; + int retval, range_error = 0; + size_t i, d2; void *bufr = NULL; int need_to_convert = 0; size_t len = 1; @@ -1864,50 +1702,48 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, /* Find info for this file, group, and var. */ if ((retval = nc4_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) - return THROW(retval); + return THROW(retval); assert(h5 && grp && var && var->hdr.id == varid && var->format_var_info && - var->type_info && var->type_info->size && - var->type_info->format_type_info); + var->type_info && var->type_info->size && + var->type_info->format_type_info); LOG((3, "%s: var->hdr.name %s mem_nc_type %d", __func__, - var->hdr.name, mem_nc_type)); + var->hdr.name, mem_nc_type)); zvar = (NCZ_VAR_INFO_T*)var->format_var_info; /* Check some stuff about the type and the file. Also end define * mode, if needed. */ if ((retval = check_for_vara(&mem_nc_type, var, h5))) - return THROW(retval); + return THROW(retval); assert((!var->ndims || (startp && countp))); /* Convert from size_t and ptrdiff_t to size64_t. Also do sanity * checks. */ if(var->ndims == 0) { /* scalar */ - start[0] = 0; - count[0] = 1; - stride[0] = 1; - ones[0] = 1; + start[0] = 0; + count[0] = 1; + stride[0] = 1; } else { for (i = 0; i < var->ndims; i++) { - /* If any of the stride values are non-positive, fail. */ - if (stridep && stridep[i] <= 0) - return NC_ESTRIDE; - start[i] = startp[i]; - count[i] = countp[i]; - stride[i] = stridep ? stridep[i] : 1; - - ones[i] = 1; - /* if any of the count values are zero don't actually read. */ - if (count[i] == 0) - no_read++; - - /* Get dimension sizes also */ - fdims[i] = var->dim[i]->len; - /* if any of the counts are zero don't actually read. */ - if (count[i] == 0) - no_read++; - } + /* If any of the stride values are non-positive, fail. */ + if (stridep && stridep[i] <= 0) + return NC_ESTRIDE; + start[i] = startp[i]; + count[i] = countp[i]; + stride[i] = stridep ? (size64_t)stridep[i] : 1; + + /* if any of the count values are zero don't actually read. */ + if (count[i] == 0) + no_read++; + + /* Get dimension sizes also */ + fdims[i] = var->dim[i]->len; + /* if any of the counts are zero don't actually read. */ + if (count[i] == 0) + no_read++; + } } #ifdef LOGGING @@ -1929,14 +1765,14 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, { /* We must convert - allocate a buffer. */ need_to_convert++; - if(zvar->scalar) { - len *= countp[0]; + if(zvar->scalar) { + len *= countp[0]; } else { - for (d2 = 0; d2 < (var->ndims); d2++) + for (d2 = 0; d2 < (var->ndims); d2++) len *= countp[d2]; } LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name, - var->type_info->hdr.id, len)); + var->type_info->hdr.id, len)); /* If we're reading, we need bufr to have enough memory to store * the data in the file. If we're writing, we need bufr to be @@ -1953,117 +1789,117 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, * read/write data beyond their largest current length. */ for (d2 = 0; d2 < var->ndims; d2++) { - size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index read */ - dim = var->dim[d2]; - assert(dim && dim->hdr.id == var->dimids[d2]); - if (count[d2] == 0) - endindex = start[d2]; /* fixup for zero read count */ - if (dim->unlimited) - { - size64_t ulen = (size64_t)dim->len; - /* Check for out of bound requests. */ - /* Allow start to equal dim size if count is zero. */ - if (start[d2] > ulen || (start[d2] == ulen && count[d2] > 0)) - BAIL_QUIET(NC_EINVALCOORDS); - if (count[d2] && endindex >= ulen) - BAIL_QUIET(NC_EEDGE); - - /* Things get a little tricky here. If we're getting a GET - request beyond the end of this var's current length in - an unlimited dimension, we'll later need to return the - fill value for the variable. */ - if (!no_read) - { - if (start[d2] >= (size64_t)fdims[d2]) - fill_value_size[d2] = count[d2]; - else if (endindex >= fdims[d2]) - fill_value_size[d2] = count[d2] - ((fdims[d2] - start[d2])/stride[d2]); - else - fill_value_size[d2] = 0; - count[d2] -= fill_value_size[d2]; - if (count[d2] == 0) - no_read++; - if (fill_value_size[d2]) - provide_fill++; - } - else - fill_value_size[d2] = count[d2]; - } - else /* Dim is not unlimited. */ - { - /* Check for out of bound requests. */ - /* Allow start to equal dim size if count is zero. */ - if (start[d2] > (size64_t)fdims[d2] || - (start[d2] == (size64_t)fdims[d2] && count[d2] > 0)) - BAIL_QUIET(NC_EINVALCOORDS); - if (count[d2] && endindex >= fdims[d2]) - BAIL_QUIET(NC_EEDGE); - /* Set the fill value boundary */ - fill_value_size[d2] = count[d2]; - } + size64_t endindex = start[d2] + stride[d2] * (count[d2] - 1); /* last index read */ + dim = var->dim[d2]; + assert(dim && dim->hdr.id == var->dimids[d2]); + if (count[d2] == 0) + endindex = start[d2]; /* fixup for zero read count */ + if (dim->unlimited) + { + size64_t ulen = (size64_t)dim->len; + /* Check for out of bound requests. */ + /* Allow start to equal dim size if count is zero. */ + if (start[d2] > ulen || (start[d2] == ulen && count[d2] > 0)) + BAIL_QUIET(NC_EINVALCOORDS); + if (count[d2] && endindex >= ulen) + BAIL_QUIET(NC_EEDGE); + + /* Things get a little tricky here. If we're getting a GET + request beyond the end of this var's current length in + an unlimited dimension, we'll later need to return the + fill value for the variable. */ + if (!no_read) + { + if (start[d2] >= (size64_t)fdims[d2]) + fill_value_size[d2] = (int)count[d2]; + else if (endindex >= fdims[d2]) + fill_value_size[d2] = (int)(count[d2] - ((fdims[d2] - start[d2])/stride[d2])); + else + fill_value_size[d2] = 0; + count[d2] -= (size64_t)fill_value_size[d2]; + if (count[d2] == 0) + no_read++; + if (fill_value_size[d2]) + provide_fill++; + } + else + fill_value_size[d2] = (int)count[d2]; + } + else /* Dim is not unlimited. */ + { + /* Check for out of bound requests. */ + /* Allow start to equal dim size if count is zero. */ + if (start[d2] > (size64_t)fdims[d2] || + (start[d2] == (size64_t)fdims[d2] && count[d2] > 0)) + BAIL_QUIET(NC_EINVALCOORDS); + if (count[d2] && endindex >= fdims[d2]) + BAIL_QUIET(NC_EEDGE); + /* Set the fill value boundary */ + fill_value_size[d2] = (int)count[d2]; + } } if (!no_read) { #ifdef LOOK - /* Now you would think that no one would be crazy enough to write - a scalar dataspace with one of the array function calls, but you - would be wrong. So let's check to see if the dataset is - scalar. If it is, we won't try to set up a hyperslab. */ - if (H5Sget_simple_extent_type(file_spaceid) == H5S_SCALAR) - { - if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0) - BAIL(NC_EHDFERR); - scalar++; - } - else - { - if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, - start, stride, ones, count) < 0) - BAIL(NC_EHDFERR); - /* Create a space for the memory, just big enough to hold the slab - we want. */ - if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0) - BAIL(NC_EHDFERR); - } + /* Now you would think that no one would be crazy enough to write + a scalar dataspace with one of the array function calls, but you + would be wrong. So let's check to see if the dataset is + scalar. If it is, we won't try to set up a hyperslab. */ + if (H5Sget_simple_extent_type(file_spaceid) == H5S_SCALAR) + { + if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0) + BAIL(NC_EHDFERR); + scalar++; + } + else + { + if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, + start, stride, ones, count) < 0) + BAIL(NC_EHDFERR); + /* Create a space for the memory, just big enough to hold the slab + we want. */ + if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0) + BAIL(NC_EHDFERR); + } #endif #ifdef LOOK - /* Fix bug when reading ZARR files with variable of type - * fixed-length string. We need to make it look like a - * variable-length string, because that's all netCDF-4 data - * model supports, lacking anonymous dimensions. So - * variable-length strings are in allocated memory that user has - * to free, which we allocate here. */ - if (var->type_info->nc_type_class == NC_STRING && - H5Tget_size(ncz_type->hdf_typeid) > 1 && - !H5Tis_variable_str(ncz_type->hdf_typeid)) - { - size64_t fstring_len; + /* Fix bug when reading ZARR files with variable of type + * fixed-length string. We need to make it look like a + * variable-length string, because that's all netCDF-4 data + * model supports, lacking anonymous dimensions. So + * variable-length strings are in allocated memory that user has + * to free, which we allocate here. */ + if (var->type_info->nc_type_class == NC_STRING && + H5Tget_size(ncz_type->hdf_typeid) > 1 && + !H5Tis_variable_str(ncz_type->hdf_typeid)) + { + size64_t fstring_len; - if ((fstring_len = H5Tget_size(ncz_type->hdf_typeid)) == 0) - BAIL(NC_EHDFERR); - if (!(*(char **)data = malloc(1 + fstring_len))) - BAIL(NC_ENOMEM); - bufr = *(char **)data; - } + if ((fstring_len = H5Tget_size(ncz_type->hdf_typeid)) == 0) + BAIL(NC_EHDFERR); + if (!(*(char **)data = malloc(1 + fstring_len))) + BAIL(NC_ENOMEM); + bufr = *(char **)data; + } #endif #ifdef LOOK - /* Create the data transfer property list. */ - if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0) - BAIL(NC_EHDFERR); - - /* Read this hyperslab into memory. */ - LOG((5, "About to H5Dread some data...")); - if (H5Dread(ncz_var->hdf_datasetid, - ((NCZ_TYPE_INFO_T *)var->type_info->format_type_info)->native_hdf_typeid, - mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0) - BAIL(NC_EHDFERR); + /* Create the data transfer property list. */ + if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0) + BAIL(NC_EHDFERR); + + /* Read this hyperslab into memory. */ + LOG((5, "About to H5Dread some data...")); + if (H5Dread(ncz_var->hdf_datasetid, + ((NCZ_TYPE_INFO_T *)var->type_info->format_type_info)->native_hdf_typeid, + mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0) + BAIL(NC_EHDFERR); #endif /*LOOK*/ - if((retval = NCZ_transferslice(var, READING, start, count, stride, bufr, var->type_info->hdr.id))) - BAIL(retval); + if((retval = NCZ_transferslice(var, READING, start, count, stride, bufr, var->type_info->hdr.id))) + BAIL(retval); } /* endif ! no_read */ /* Now we need to fake up any further data that was asked for, @@ -2071,74 +1907,74 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, just read, if any. */ if (!zvar->scalar && provide_fill) { - void *filldata; - size_t real_data_size = 0; - size_t fill_len; - - /* Skip past the real data we've already read. */ - if (!no_read) - for (real_data_size = file_type_size, d2 = 0; d2 < var->ndims; d2++) - real_data_size *= count[d2]; - - /* Get the fill value from the ZARR variable. Memory will be - * allocated. */ - if (NCZ_ensure_fill_value(var)) - BAIL(NC_EINVAL); - - /* How many fill values do we need? */ - for (fill_len = 1, d2 = 0; d2 < var->ndims; d2++) - fill_len *= (fill_value_size[d2] ? fill_value_size[d2] : 1); - - /* Copy the fill value into the rest of the data buffer. */ - filldata = (char *)data + real_data_size; - for (i = 0; i < fill_len; i++) - { - /* Copy one instance of the fill_value */ - if((retval = NC_copy_data(h5->controller,var->type_info->hdr.id,var->fill_value,1,filldata))) - BAIL(retval); - filldata = (char *)filldata + file_type_size; - } + void *filldata; + size_t real_data_size = 0; + size_t fill_len; + + /* Skip past the real data we've already read. */ + if (!no_read) + for (real_data_size = file_type_size, d2 = 0; d2 < var->ndims; d2++) + real_data_size *= count[d2]; + + /* Get the fill value from the ZARR variable. Memory will be + * allocated. */ + if (NCZ_ensure_fill_value(var)) + BAIL(NC_EINVAL); + + /* How many fill values do we need? */ + for (fill_len = 1, d2 = 0; d2 < var->ndims; d2++) + fill_len *= (size_t)(fill_value_size[d2] ? fill_value_size[d2] : 1); + + /* Copy the fill value into the rest of the data buffer. */ + filldata = (char *)data + real_data_size; + for (i = 0; i < fill_len; i++) + { + /* Copy one instance of the fill_value */ + if((retval = NC_copy_data(h5->controller,var->type_info->hdr.id,var->fill_value,1,filldata))) + BAIL(retval); + filldata = (char *)filldata + file_type_size; + } } /* Convert data type if needed. */ if (need_to_convert) { - if(var->quantize_mode < 0) {if((retval = NCZ_ensure_quantizer(ncid,var))) BAIL(retval);} - if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type, - len, &range_error, var->fill_value, - (h5->cmode & NC_CLASSIC_MODEL), var->quantize_mode, - var->nsd))) - BAIL(retval); + if(var->quantize_mode == 0) {if((retval = NCZ_ensure_quantizer(ncid,var))) BAIL(retval);} + if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type, + len, &range_error, var->fill_value, + (h5->cmode & NC_CLASSIC_MODEL), var->quantize_mode, + var->nsd))) + BAIL(retval); /* For strict netcdf-3 rules, ignore erange errors between UBYTE - * and BYTE types. */ - if ((h5->cmode & NC_CLASSIC_MODEL) && - (var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) && - (mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) && - range_error) - range_error = 0; + * and BYTE types. */ + if ((h5->cmode & NC_CLASSIC_MODEL) && + (var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) && + (mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) && + range_error) + range_error = 0; } exit: #ifdef LOOK if (file_spaceid > 0) - if (H5Sclose(file_spaceid) < 0) - BAIL2(NC_EHDFERR); + if (H5Sclose(file_spaceid) < 0) + BAIL2(NC_EHDFERR); if (mem_spaceid > 0) - if (H5Sclose(mem_spaceid) < 0) - BAIL2(NC_EHDFERR); + if (H5Sclose(mem_spaceid) < 0) + BAIL2(NC_EHDFERR); if (xfer_plistid > 0) - if (H5Pclose(xfer_plistid) < 0) + if (H5Pclose(xfer_plistid) < 0) - BAIL2(NC_EHDFERR); + BAIL2(NC_EHDFERR); #endif if (need_to_convert && bufr) - free(bufr); + free(bufr); /* If there was an error return it, otherwise return any potential range error value. If none, return NC_NOERR as usual.*/ if (retval) - return THROW(retval); + return THROW(retval); if (range_error) - return THROW(NC_ERANGE); + return THROW(NC_ERANGE); return NC_NOERR; } @@ -2176,11 +2012,11 @@ NCZ_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, */ int NCZ_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep, - int *ndimsp, int *dimidsp, int *nattsp, - int *shufflep, int *unused4, int *unused5, - int *fletcher32p, int *storagep, size_t *chunksizesp, - int *no_fill, void *fill_valuep, int *endiannessp, - unsigned int *unused1, size_t *unused2, unsigned int *unused3) + int *ndimsp, int *dimidsp, int *nattsp, + int *shufflep, int *unused4, int *unused5, + int *fletcher32p, int *storagep, size_t *chunksizesp, + int *no_fill, void *fill_valuep, int *endiannessp, + unsigned int *unused1, size_t *unused2, unsigned int *unused3) { NC_FILE_INFO_T *h5; NC_GRP_INFO_T *grp; @@ -2194,43 +2030,43 @@ NCZ_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep, /* Find the file, group, and var info, and do lazy att read if * needed. */ if ((retval = ncz_find_grp_var_att(ncid, varid, NULL, 0, 0, NULL, - &h5, &grp, &var, NULL))) - goto done; + &h5, &grp, &var, NULL))) + goto done; assert(grp && h5); /* Short-circuit the filter-related inquiries */ if(shufflep) { - *shufflep = 0; - if((retval = NCZ_inq_var_filter_info(ncid,varid,H5Z_FILTER_SHUFFLE,NULL,NULL))==NC_NOERR) - *shufflep = 1; + *shufflep = 0; + if((retval = NCZ_inq_var_filter_info(ncid,varid,H5Z_FILTER_SHUFFLE,NULL,NULL))==NC_NOERR) + *shufflep = 1; } retval = NC_NOERR; /* reset */ if(fletcher32p) { - *fletcher32p = 0; - if((retval = NCZ_inq_var_filter_info(ncid,varid,H5Z_FILTER_FLETCHER32,NULL,NULL))==NC_NOERR) - *fletcher32p = 1; + *fletcher32p = 0; + if((retval = NCZ_inq_var_filter_info(ncid,varid,H5Z_FILTER_FLETCHER32,NULL,NULL))==NC_NOERR) + *fletcher32p = 1; } retval = NC_NOERR; /* reset */ /* Now that lazy atts have been read, use the libsrc4 function to * get the answers. */ retval = NC4_inq_var_all(ncid, varid, name, xtypep, ndimsp, dimidsp, nattsp, - NULL, unused4, unused5, NULL, - storagep, chunksizesp, no_fill, fill_valuep, - endiannessp, unused1, unused2, unused3); + NULL, unused4, unused5, NULL, + storagep, chunksizesp, no_fill, fill_valuep, + endiannessp, unused1, unused2, unused3); done: return ZUNTRACEX(retval,"xtype=%d natts=%d shuffle=%d fletcher32=%d no_fill=%d endianness=%d ndims=%d dimids=%s storage=%d chunksizes=%s", - (xtypep?*xtypep:-1), - (nattsp?*nattsp:-1), - (shufflep?*shufflep:-1), - (fletcher32p?*fletcher32p:-1), - (no_fill?*no_fill:-1), - (endiannessp?*endiannessp:-1), - (ndimsp?*ndimsp:-1), - (dimidsp?nczprint_idvector(var->ndims,dimidsp):"null"), - (storagep?*storagep:-1), - (chunksizesp?nczprint_sizevector(var->ndims,chunksizesp):"null")); + (xtypep?*xtypep:-1), + (nattsp?*nattsp:-1), + (shufflep?*shufflep:-1), + (fletcher32p?*fletcher32p:-1), + (no_fill?*no_fill:-1), + (endiannessp?*endiannessp:-1), + (ndimsp?*ndimsp:-1), + (dimidsp?nczprint_idvector(var->ndims,dimidsp):"null"), + (storagep?*storagep:-1), + (chunksizesp?nczprint_sizevector(var->ndims,chunksizesp):"null")); } #ifdef LOOK @@ -2249,23 +2085,23 @@ NCZ_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep, */ int ncz_set_var_chunk_cache_ints(int ncid, int varid, int size, int nelems, - int preemption) + int preemption) { size_t real_size = H5D_CHUNK_CACHE_NBYTES_DEFAULT; size_t real_nelems = H5D_CHUNK_CACHE_NSLOTS_DEFAULT; float real_preemption = CHUNK_CACHE_PREEMPTION; if (size >= 0) - real_size = ((size_t) size) * MEGABYTE; + real_size = ((size_t) size) * MEGABYTE; if (nelems >= 0) - real_nelems = nelems; + real_nelems = nelems; if (preemption >= 0) - real_preemption = preemption / 100.; + real_preemption = preemption / 100.; return NCZ_set_var_chunk_cache(ncid, varid, real_size, real_nelems, - real_preemption); + real_preemption); } #endif @@ -2281,48 +2117,48 @@ ncz_gettype(NC_FILE_INFO_T* h5, NC_GRP_INFO_T* container, int xtype, NC_TYPE_INF * struct. */ if (xtype <= NC_STRING) { - size_t len; - char name[NC_MAX_NAME]; - - /* Get type name and length. */ - if((retval = NC4_inq_atomic_type(xtype,name,&len))) - BAIL(retval); - - /* Create new NC_TYPE_INFO_T struct for this atomic type. */ - if ((retval = nc4_type_new(len, name, xtype, &type))) - BAIL(retval); - assert(type->rc == 0); - type->container = container; - type->endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); - type->size = len; - - /* Allocate storage for NCZ-specific type info. */ - if (!(ztype = calloc(1, sizeof(NCZ_TYPE_INFO_T)))) - return NC_ENOMEM; - type->format_type_info = ztype; - ztype->common.file = h5; - ztype = NULL; - - /* Set the "class" of the type */ - if (xtype == NC_CHAR) - type->nc_type_class = NC_CHAR; - else - { - if(xtype == NC_FLOAT || xtype == NC_DOUBLE) - type->nc_type_class = NC_FLOAT; - else if(xtype < NC_STRING) - type->nc_type_class = NC_INT; - else - type->nc_type_class = NC_STRING; - } + size_t len; + char name[NC_MAX_NAME]; + + /* Get type name and length. */ + if((retval = NC4_inq_atomic_type(xtype,name,&len))) + BAIL(retval); + + /* Create new NC_TYPE_INFO_T struct for this atomic type. */ + if ((retval = nc4_type_new(len, name, xtype, &type))) + BAIL(retval); + assert(type->rc == 0); + type->container = container; + type->endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + type->size = len; + + /* Allocate storage for NCZ-specific type info. */ + if (!(ztype = calloc(1, sizeof(NCZ_TYPE_INFO_T)))) + return NC_ENOMEM; + type->format_type_info = ztype; + ztype->common.file = h5; + ztype = NULL; + + /* Set the "class" of the type */ + if (xtype == NC_CHAR) + type->nc_type_class = NC_CHAR; + else + { + if(xtype == NC_FLOAT || xtype == NC_DOUBLE) + type->nc_type_class = NC_FLOAT; + else if(xtype < NC_STRING) + type->nc_type_class = NC_INT; + else + type->nc_type_class = NC_STRING; + } } else { #ifdef LOOK - /* If this is a user defined type, find it. */ - if (nc4_find_type(grp->nc4_info, xtype, &type)) + /* If this is a user defined type, find it. */ + if (nc4_find_type(grp->nc4_info, xtype, &type)) #endif - BAIL(NC_EBADTYPE); + BAIL(NC_EBADTYPE); } /* increment its ref. count */ @@ -2333,44 +2169,83 @@ ncz_gettype(NC_FILE_INFO_T* h5, NC_GRP_INFO_T* container, int xtype, NC_TYPE_INF exit: if (type) - retval = nc4_type_free(type); + retval = nc4_type_free(type); nullfree(ztype); return THROW(retval); } -#if 0 -/** -Given start+count+stride+dim vectors, determine the largest -index touched per dimension. If that index is greater-than -the dimension size, then do one of two things: -1. If the dimension is fixed size, then return NC_EDIMSIZE. -2. If the dimension is unlimited, then extend the size of that - dimension to cover that maximum point. - -@param var -@param start vector -@param count vector -@param stride vector -@param reading vs writing -@return NC_EXXX error code +/*Flush all chunks to disk. Create any that are missing +and fill as needed. */ int -NCZ_update_dim_extents(NC_VAR_INFO_T* var, size64_t* start, size64_t* count, size64_t* stride, int reading) +NCZ_write_var_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) { - int r; - int rank = var->ndims; - - NC_UNUSED(reading); - - for(r=0;rdim[r]; - size64_t endpoint; /* compute last point touched */ - endpoint = start[r] + stride[r]*count[r] - stride[r]; - if(dim->len < endpoint) { - if(!dim->unlimited) return NC_EDIMSIZE; - /*else*/ dim->len = endpoint+1; - } + int stat = NC_NOERR; + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + + NC_UNUSED(file); + ZTRACE(3,"var=%s",var->hdr.name); + + /* Flush the cache */ + if(zvar->cache) { + if((stat = NCZ_flush_chunk_cache(zvar->cache))) goto done; } - return NC_NOERR; + +#ifdef FILLONCLOSE + /* If fill is enabled, then create missing chunks */ + if(!var->no_fill) { + size_t i; + NCZOdometer* chunkodom = NULL; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + NCZMAP* map = zfile->map; + size64_t start[NC_MAX_VAR_DIMS]; + size64_t stop[NC_MAX_VAR_DIMS]; + size64_t stride[NC_MAX_VAR_DIMS]; + char* key = NULL; + + if(var->ndims == 0) { /* scalar */ + start[i] = 0; + stop[i] = 1; + stride[i] = 1; + } else { + for(i=0;indims;i++) { + size64_t nchunks = ceildiv(var->dim[i]->len,var->chunksizes[i]); + start[i] = 0; + stop[i] = nchunks; + stride[i] = 1; + } + } + + { + if(zvar->scalar) { + if((chunkodom = nczodom_new(1,start,stop,stride,stop))==NULL) + } else { + /* Iterate over all the chunks to create missing ones */ + if((chunkodom = nczodom_new(var->ndims,start,stop,stride,stop))==NULL) + {stat = NC_ENOMEM; goto done;} + } + for(;nczodom_more(chunkodom);nczodom_next(chunkodom)) { + size64_t* indices = nczodom_indices(chunkodom); + /* Convert to key */ + if((stat = NCZ_buildchunkpath(zvar->cache,indices,&key))) goto done; + switch (stat = NCZMD_exists(file,key)) { + case NC_NOERR: goto next; /* already exists */ + case NC_ENOOBJECT: break; /* does not exist, create it with fill */ + default: goto done; /* some other error */ + } + /* If we reach here, then chunk does not exist, create it with fill */ + assert(zvar->cache->fillchunk != NULL); + if((stat=nczmap_write(map,key,0,zvar->cache->chunksize,zvar->cache->fillchunk))) goto done; +next: + nullfree(key); + key = NULL; + } + } + nczodom_free(chunkodom); + nullfree(key); + } +#endif /*FILLONCLOSE*/ + +done: + return ZUNTRACE(THROW(stat)); } -#endif diff --git a/libnczarr/zwalk.c b/libnczarr/zwalk.c index 21f9ed9436..1d90a3c191 100644 --- a/libnczarr/zwalk.c +++ b/libnczarr/zwalk.c @@ -16,11 +16,11 @@ static unsigned int optimize = 0; extern int NCZ_buildchunkkey(size_t R, const size64_t* chunkindices, char** keyp); /* 0 => no debug */ -static unsigned int wdebug = 1; +static int wdebug = 1; /* Forward */ static int NCZ_walk(NCZProjection** projv, NCZOdometer* chunkodom, NCZOdometer* slpodom, NCZOdometer* memodom, const struct Common* common, void* chunkdata); -static int rangecount(NCZChunkRange range); +static size64_t rangecount(NCZChunkRange range); static int readfromcache(void* source, size64_t* chunkindices, void** chunkdata); static int iswholechunk(struct Common* common,NCZSlice*); static int wholechunk_indices(struct Common* common, NCZSlice* slices, size64_t* chunkindices); @@ -28,22 +28,6 @@ static int wholechunk_indices(struct Common* common, NCZSlice* slices, size64_t* static int transfern(const struct Common* common, unsigned char* slpptr, unsigned char* memptr, size_t avail, size_t slpstride, void* chunkdata); #endif -#if 0 -static const char* -astype(int typesize, void* ptr) -{ - switch(typesize) { - case 4: { - static char is[8]; - snprintf(is,sizeof(is),"%u",*((unsigned int*)ptr)); - return is; - } break; - default: break; - } - return "?"; -} -#endif - /**************************************************/ int ncz_chunking_init(void) @@ -88,7 +72,8 @@ NCZ_transferslice(NC_VAR_INFO_T* var, int reading, size64_t* start, size64_t* count, size64_t* stride, void* memory, nc_type typecode) { - int r,stat = NC_NOERR; + int stat = NC_NOERR; + size_t r; size64_t dimlens[NC_MAX_VAR_DIMS]; unsigned char isunlimited[NC_MAX_VAR_DIMS]; size64_t chunklens[NC_MAX_VAR_DIMS]; @@ -156,7 +141,7 @@ NCZ_transferslice(NC_VAR_INFO_T* var, int reading, } if(wdebug >= 1) { - fprintf(stderr,"\trank=%d",common.rank); + fprintf(stderr,"\trank=%zu",common.rank); if(!common.scalar) { fprintf(stderr," dimlens=%s",nczprint_vector(common.rank,dimlens)); fprintf(stderr," chunklens=%s",nczprint_vector(common.rank,chunklens)); @@ -214,13 +199,13 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) */ if(wdebug >= 2) - fprintf(stderr,"slices=%s\n",nczprint_slices(common->rank,slices)); + fprintf(stderr,"slices=%s\n",nczprint_slices((int)common->rank,slices)); if((stat = NCZ_projectslices(common, slices, &chunkodom))) goto done; if(wdebug >= 4) { - fprintf(stderr,"allprojections:\n%s",nczprint_allsliceprojections(common->rank,common->allprojections)); fflush(stderr); + fprintf(stderr,"allprojections:\n%s",nczprint_allsliceprojections((int)common->rank,common->allprojections)); fflush(stderr); } wholechunk = iswholechunk(common,slices); @@ -239,7 +224,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) fprintf(stderr,"case: wholechunk: chunkindices: %s\n",nczprint_vector(common->rank,chunkindices)); /* Read the chunk; handles fixed vs char* strings*/ switch ((stat = common->reader.read(common->reader.source, chunkindices, &chunkdata))) { - case NC_EEMPTY: /* cache created the chunk */ + case NC_ENOOBJECT: /* not in cache => created the chunk */ break; case NC_NOERR: break; default: goto done; @@ -271,7 +256,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) /* iterate over the odometer: all combination of chunk indices in the projections */ for(;nczodom_more(chunkodom);) { - int r; + size_t r; size64_t* chunkindices = NULL; NCZSlice slpslices[NC_MAX_VAR_DIMS]; NCZSlice memslices[NC_MAX_VAR_DIMS]; @@ -298,7 +283,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) if(wdebug > 0) { fprintf(stderr,"Selected projections:\n"); for(r=0;rrank;r++) { - fprintf(stderr,"\t[%d] %s\n",r,nczprint_projection(*proj[r])); + fprintf(stderr,"\t[%zu] %s\n",r,nczprint_projection(*proj[r])); shape[r] = proj[r]->iocount; } fprintf(stderr,"\tshape=%s\n",nczprint_vector(common->rank,shape)); @@ -321,7 +306,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) /* Read from cache */ stat = common->reader.read(common->reader.source, chunkindices, &chunkdata); switch (stat) { - case NC_EEMPTY: /* cache created the chunk */ + case NC_ENOOBJECT: /* cache created the chunk */ break; case NC_NOERR: break; default: goto done; @@ -359,8 +344,8 @@ wdebug2(const struct Common* common, unsigned char* slpptr, unsigned char* mempt unsigned char* membase = common->memory; unsigned slpoff = (unsigned)(slpptr - slpbase); unsigned memoff = (unsigned)(memptr - membase); - unsigned slpidx = slpoff / common->typesize; - unsigned memidx = memoff / common->typesize; + unsigned slpidx = slpoff / (unsigned)common->typesize; + unsigned memidx = memoff / (unsigned)common->typesize; unsigned value; fprintf(stderr,"wdebug2: %s: [%u/%d] %u->%u", @@ -398,6 +383,9 @@ NCZ_walk(NCZProjection** projv, NCZOdometer* chunkodom, NCZOdometer* slpodom, NC { int stat = NC_NOERR; + NC_UNUSED(projv); + NC_UNUSED(chunkodom); + for(;;) { size64_t slpoffset = 0; size64_t memoffset = 0; @@ -457,42 +445,6 @@ if(wdebug > 0) {wdebug2(common,slpptr0,memptr0,slpavail,laststride,chunkdata);} return stat; } -#if 0 -#ifdef WDEBUG -static void -wdebug1(const struct Common* common, unsigned char* srcptr, unsigned char* dstptr, size_t count, size_t stride, void* chunkdata, const char* tag) -{ - unsigned char* dstbase = (common->reading?common->memory:chunkdata); - unsigned char* srcbase = (common->reading?chunkdata:common->memory); - unsigned dstoff = (unsigned)(dstptr - dstbase); - unsigned srcoff = (unsigned)(srcptr - srcbase); -// unsigned srcidx = srcoff / sizeof(unsigned); - - fprintf(stderr,"%s: %s: [%u/%d] %u->%u", - tag, - common->reading?"read":"write", - (unsigned)count, - (unsigned)stride, - (unsigned)(srcoff/common->typesize), - (unsigned)(dstoff/common->typesize) - ); -#if 0 - fprintf(stderr,"\t%s[%u]=%u\n",(common->reading?"chunkdata":"memdata"), -// 0,((unsigned*)srcptr)[0] - srcidx,((unsigned*)srcbase)[srcidx] - ); -#endif -#if 0 - { size_t len = common->typesize*count; - fprintf(stderr," | [%u] %u->%u\n",(unsigned)len,(unsigned)srcoff,(unsigned)dstoff); - } -#endif - fprintf(stderr,"\n"); -} -#else -#define wdebug1(common,srcptr,dstptr,count,srcstride,dststride,chunkdata,tag) -#endif -#endif /*0*/ #ifdef TRANSFERN static int @@ -507,17 +459,17 @@ transfern(const struct Common* common, unsigned char* slpptr, unsigned char* mem if(common->reading) { if(slpstride == 1) { if((stat=NCZ_copy_data(common->file,common->var,slpptr,avail,common->reading,memptr))) goto done; -/// memcpy(memptr,slpptr,len); /* straight copy */ +//// memcpy(memptr,slpptr,len); /* straight copy */ } else { for(m=0,s=0;sfile,common->var,slpptr+soffset,1,common->reading,memptr+moffset))) goto done; -/// memcpy(memptr+moffset,slpptr+soffset,typesize); +//// memcpy(memptr+moffset,slpptr+soffset,typesize); } } if(common->swap && xtype < NC_STRING) - NCZ_swapatomicdata(len,memptr,common->typesize); + NCZ_swapatomicdata(len,memptr,(int)common->typesize); } else { /*writing*/ unsigned char* srcbase = (common->reading?chunkdata:common->memory); unsigned srcoff = (unsigned)(memptr - srcbase); @@ -534,36 +486,13 @@ unsigned srcidx = srcoff / sizeof(unsigned); (void)srcidx; } } if(common->swap && xtype < NC_STRING) - NCZ_swapatomicdata(len,slpptr,common->typesize); + NCZ_swapatomicdata(len,slpptr,(int)common->typesize); } done: return THROW(stat); } #endif /*TRANSFERN*/ -#if 0 -/* This function may not be necessary if code in zvar does it instead */ -static int -NCZ_fillchunk(void* chunkdata, struct Common* common) -{ - int stat = NC_NOERR; - - if(common->fillvalue == NULL) { - memset(chunkdata,0,common->chunkcount*common->typesize); - goto done; - } - - if(common->cache->fillchunk == NULL) { - /* Get fill chunk*/ - if((stat = NCZ_create_fill_chunk(common->cache->chunksize, common->typesize, common->fillvalue, &common->cache->fillchunk))) - goto done; - } - memcpy(chunkdata,common->cache->fillchunk,common->cache->chunksize); -done: - return stat; -} -#endif - /* Break out this piece so we can use it for unit testing */ /** @param slices @@ -577,7 +506,7 @@ NCZ_projectslices(struct Common* common, NCZOdometer** odomp) { int stat = NC_NOERR; - int r; + size_t r; NCZOdometer* odom = NULL; NCZSliceProjections* allprojections = NULL; NCZChunkRange ranges[NC_MAX_VAR_DIMS]; @@ -605,7 +534,7 @@ NCZ_projectslices(struct Common* common, /* Compute the shape vector */ for(r=0;rrank;r++) { - int j; + size_t j; size64_t iocount = 0; NCZProjection* projections = allprojections[r].projections; for(j=0;jrank;i++) { @@ -745,7 +658,7 @@ iswholechunk(struct Common* common, NCZSlice* slices) static int wholechunk_indices(struct Common* common, NCZSlice* slices, size64_t* chunkindices) { - int i; + size_t i; for(i=0;irank;i++) chunkindices[i] = (slices[i].start / common->chunklens[i]); return NC_NOERR; @@ -769,7 +682,7 @@ NCZ_transferscalar(struct Common* common) /* Read from single chunk from cache */ chunkindices[0] = 0; switch ((stat = common->reader.read(common->reader.source, chunkindices, &chunkdata))) { - case NC_EEMPTY: /* cache created the chunk */ + case NC_ENOOBJECT: /* cache created the chunk */ break; case NC_NOERR: break; default: goto done; diff --git a/libnczarr/zxcache.c b/libnczarr/zxcache.c index f4ab040d72..06f18a0466 100644 --- a/libnczarr/zxcache.c +++ b/libnczarr/zxcache.c @@ -176,7 +176,7 @@ NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, char dimsep, NCZC if((cache = calloc(1,sizeof(NCZChunkCache))) == NULL) {stat = NC_ENOMEM; goto done;} cache->var = var; - cache->ndims = var->ndims + zvar->scalar; + cache->ndims = var->ndims; cache->fillchunk = NULL; cache->chunksize = chunksize; cache->dimension_separator = dimsep; @@ -273,7 +273,7 @@ int NCZ_read_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap) { int stat = NC_NOERR; - int rank = cache->ndims; + size_t rank = cache->ndims; NCZCacheEntry* entry = NULL; ncexhashkey_t hkey = 0; int created = 0; @@ -289,6 +289,7 @@ NCZ_read_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap break; case NC_ENOOBJECT: entry = NULL; /* not found; */ + stat = NC_NOERR; break; default: goto done; } @@ -318,51 +319,10 @@ fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru)); entry = NULL; done: - if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */ - if(entry) free_cache_entry(cache,entry); - return THROW(stat); -} - -#if 0 -int -NCZ_write_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void* content) -{ - int stat = NC_NOERR; - int rank = cache->ndims; - NCZCacheEntry* entry = NULL; - ncexhashkey_t hkey; - - /* create the hash key */ - hkey = ncxcachekey(indices,sizeof(size64_t)*cache->ndims); - - if(entry == NULL) { /*!found*/ - /* Create a new entry */ - if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL) - {stat = NC_ENOMEM; goto done;} - memcpy(entry->indices,indices,rank*sizeof(size64_t)); - if((stat = NCZ_buildchunkpath(cache,indices,&entry->key))) goto done; - entry->hashkey = hkey; - /* Create the local copy space */ - entry->size = cache->chunksize; - if((entry->data = calloc(1,cache->chunksize)) == NULL) - {stat = NC_ENOMEM; goto done;} - memcpy(entry->data,content,cache->chunksize); - } - setmodified(entry,1); - nclistpush(cache->mru,entry); /* MRU order */ -#ifdef DEBUG -fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru)); -#endif - entry = NULL; - - /* Ensure cache constraints not violated */ - if((stat=verifycache(cache))) goto done; - -done: + if(created && stat == NC_NOERR) stat = NC_ENOOBJECT; /* tell upper layers */ if(entry) free_cache_entry(cache,entry); return THROW(stat); } -#endif /* Constrain cache */ static int @@ -370,11 +330,6 @@ verifycache(NCZChunkCache* cache) { int stat = NC_NOERR; -#if 0 - /* Sanity check; make sure at least one entry is always allowed */ - if(nclistlength(cache->mru) == 1) - goto done; -#endif if((stat = constraincache(cache,USEPARAMSIZE))) goto done; done: return stat; @@ -386,14 +341,7 @@ static int flushcache(NCZChunkCache* cache) { int stat = NC_NOERR; -#if 0 - size_t oldsize = cache->params.size; - cache->params.size = 0; stat = constraincache(cache,USEPARAMSIZE); - cache->params.size = oldsize; -#else - stat = constraincache(cache,USEPARAMSIZE); -#endif return stat; } @@ -502,37 +450,42 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) nc_type typeid = var->type_info->hdr.id; size_t typesize = var->type_info->size; - if(cache->fillchunk) goto done; + /* If the fill_value is changed, then relaim the fill chunk */ + if((stat = NCZ_ensure_fill_value(var))) goto done; + + if(cache->fillchunk) goto done; /* Still there => value did not change */ + /* Allocate new fill chunk */ if((cache->fillchunk = malloc(cache->chunksize))==NULL) {stat = NC_ENOMEM; goto done;} + if(var->no_fill) { /* use zeros */ memset(cache->fillchunk,0,cache->chunksize); goto done; } - if((stat = NCZ_ensure_fill_value(var))) goto done; + + assert(var->fill_value != NULL); if(typeid == NC_STRING) { char* src = *((char**)(var->fill_value)); char** dst = (char**)(cache->fillchunk); for(i=0;ichunkcount;i++) dst[i] = strdup(src); - } else - switch (typesize) { - case 1: { + } else switch (typesize) { + case 1: { /*byte|char*/ unsigned char c = *((unsigned char*)var->fill_value); memset(cache->fillchunk,c,cache->chunksize); } break; - case 2: { + case 2: { /*short*/ unsigned short fv = *((unsigned short*)var->fill_value); unsigned short* p2 = (unsigned short*)cache->fillchunk; for(i=0;ichunksize;i+=typesize) *p2++ = fv; } break; - case 4: { + case 4: { /*int|float*/ unsigned int fv = *((unsigned int*)var->fill_value); unsigned int* p4 = (unsigned int*)cache->fillchunk; for(i=0;ichunksize;i+=typesize) *p4++ = fv; } break; - case 8: { + case 8: { /*long|double*/ unsigned long long fv = *((unsigned long long*)var->fill_value); unsigned long long* p8 = (unsigned long long*)cache->fillchunk; for(i=0;ichunksize;i+=typesize) *p8++ = fv; @@ -540,9 +493,10 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) default: { unsigned char* p; for(p=cache->fillchunk,i=0;ichunksize;i+=typesize,p+=typesize) - memcpy(p,var->fill_value,typesize); + memcpy(p,var->fill_value,typesize); /* Warning: only works for non-pointer values */ } break; } + done: return NC_NOERR; } @@ -579,54 +533,6 @@ NCZ_chunk_cache_modify(NCZChunkCache* cache, const size64_t* indices) return THROW(stat); } -/**************************************************/ -/* -From Zarr V2 Specification: -"The compressed sequence of bytes for each chunk is stored under -a key formed from the index of the chunk within the grid of -chunks representing the array. To form a string key for a -chunk, the indices are converted to strings and concatenated -with the dimension_separator character ('.' or '/') separating -each index. For example, given an array with shape (10000, -10000) and chunk shape (1000, 1000) there will be 100 chunks -laid out in a 10 by 10 grid. The chunk with indices (0, 0) -provides data for rows 0-1000 and columns 0-1000 and is stored -under the key "0.0"; the chunk with indices (2, 4) provides data -for rows 2000-3000 and columns 4000-5000 and is stored under the -key "2.4"; etc." -*/ - -/** - * @param R Rank - * @param chunkindices The chunk indices - * @param dimsep the dimension separator - * @param keyp Return the chunk key string - */ -int -NCZ_buildchunkkey(size_t R, const size64_t* chunkindices, char dimsep, char** keyp) -{ - int stat = NC_NOERR; - size_t r; - NCbytes* key = ncbytesnew(); - - if(keyp) *keyp = NULL; - - assert(islegaldimsep(dimsep)); - - for(r=0;r 0) ncbytesappend(key,dimsep); - /* Print as decimal with no leading zeros */ - snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]); - ncbytescat(key,sindex); - } - ncbytesnull(key); - if(keyp) *keyp = ncbytesextract(key); - - ncbytesfree(key); - return THROW(stat); -} - /** * @internal Push data to chunk of a file. * If chunk does not exist, create it @@ -662,7 +568,7 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) if(tid == NC_STRING && !entry->isfixedstring) { /* Convert from char* to char[strlen] format */ - int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + size_t maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); assert(maxstrlen > 0); if((strchunk = malloc((size_t)cache->chunkcount * (size_t)maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;} /* copy char* to char[] format */ @@ -703,7 +609,6 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) switch(stat) { case NC_NOERR: break; - case NC_EEMPTY: default: goto done; } done: @@ -754,8 +659,8 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) stat = nczmap_len(map,path,&size); nullfree(path); path = NULL; switch(stat) { - case NC_NOERR: entry->size = size; break; - case NC_EEMPTY: empty = 1; stat = NC_NOERR; break; + case NC_NOERR: entry->size = size; empty = (size == 0); break; + case NC_ENOOBJECT: empty = 1; stat = NC_NOERR; break; /* "create" the object */ default: goto done; } @@ -772,7 +677,7 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) nullfree(path); path = NULL; switch (stat) { case NC_NOERR: break; - case NC_EEMPTY: empty = 1; stat = NC_NOERR;break; + case NC_ENOOBJECT: empty = 1; stat = NC_NOERR;break; default: goto done; } entry->isfiltered = (int)FILTERED(cache); /* Is the data being read filtered? */ @@ -817,7 +722,7 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) if(tid == NC_STRING && entry->isfixedstring) { /* Convert from char[strlen] to char* format */ - int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + size_t maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); assert(maxstrlen > 0); /* copy char[] to char* format */ if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL) @@ -849,7 +754,7 @@ NCZ_buildchunkpath(NCZChunkCache* cache, const size64_t* chunkindices, struct Ch assert(key != NULL); /* Get the chunk object name */ - if((stat = NCZ_buildchunkkey(cache->ndims, chunkindices, cache->dimension_separator, &chunkname))) goto done; + if((stat = NCZF_encode_chunkkey(cache->var->container->nc4_info,cache->var,cache->ndims, chunkindices, cache->dimension_separator, &chunkname))) goto done; /* Get the var object key */ if((stat = NCZ_varkey(cache->var,&varkey))) goto done; key->varkey = varkey; varkey = NULL; diff --git a/libnetcdf.settings.in b/libnetcdf.settings.in index a9fae8d1b7..a7962b9bcf 100644 --- a/libnetcdf.settings.in +++ b/libnetcdf.settings.in @@ -29,6 +29,7 @@ Legacy Macros: @HAS_LEGACY_MACROS@ -------- Benchmarks: @HAS_BENCHMARKS@ NetCDF-2 API: @HAS_NC2@ +NetCDF-4 API: @HAS_NC4@ HDF4 Support: @HAS_HDF4@ HDF5 Support: @HAS_HDF5@ CDF5 Support: @HAS_CDF5@ @@ -43,7 +44,10 @@ S3 Support: @HAS_S3@ S3 SDK: @WHICH_S3_SDK@ NCZarr Support: @HAS_NCZARR@ -NCZarr Zip Support: @HAS_NCZARR_ZIP@ +NCZarr Support V3: @HAS_NCZARR_V3@ +NCZarr File Support: yes +NCZarr Zip Support: @HAS_NCZARR_ZIP@ +NCZarr S3 Support: @HAS_S3@ Diskless Support: @HAS_DISKLESS@ MMap Support: @HAS_MMAP@ diff --git a/libsrc/ncx.h b/libsrc/ncx.h index 13e463dd5c..37c099535e 100644 --- a/libsrc/ncx.h +++ b/libsrc/ncx.h @@ -36,7 +36,6 @@ #include "rnd.h" #include /* size_t */ #include -#include /* off_t */ #include "ncdispatch.h" #if defined(_CRAY) && !defined(_CRAYIEEE) && !defined(__crayx1) diff --git a/libsrc/s3io.c b/libsrc/s3io.c index f8bd941b42..d4381da7a9 100644 --- a/libsrc/s3io.c +++ b/libsrc/s3io.c @@ -184,7 +184,7 @@ s3io_open(const char* path, /* Get the size */ switch (status = NC_s3sdkinfo(s3io->s3client,s3io->s3.bucket,s3io->s3.rootkey,(long long unsigned*)&s3io->size,&s3io->errmsg)) { case NC_NOERR: break; - case NC_EEMPTY: + case NC_ENOOBJECT: s3io->size = 0; goto done; default: @@ -253,7 +253,9 @@ s3io_close(ncio* nciop, int deleteit) assert(s3io != NULL); if(s3io->s3client && s3io->s3.bucket && s3io->s3.rootkey) { - NC_s3sdkclose(s3io->s3client, &s3io->s3, deleteit, &s3io->errmsg); + if(deleteit) + NC_s3sdktruncate(s3io->s3client, s3io->s3.bucket, s3io->s3.rootkey, &s3io->errmsg); + NC_s3sdkclose(s3io->s3client, &s3io->errmsg); } s3io->s3client = NULL; NC_s3clear(&s3io->s3); diff --git a/libsrc4/CMakeLists.txt b/libsrc4/CMakeLists.txt index 09227318e6..e9c0a26371 100644 --- a/libsrc4/CMakeLists.txt +++ b/libsrc4/CMakeLists.txt @@ -9,6 +9,19 @@ set(libsrc4_SOURCES nc4dispatch.c nc4attr.c nc4dim.c nc4grp.c nc4internal.c nc4type.c nc4var.c ncfunc.c ncindex.c nc4cache.c) +## +# Turn off inclusion of particular files when using the cmake-native +# option to turn on Unity Builds. +# +# For more information, see: +# * https://github.com/Unidata/netcdf-c/pull/2839/ +# * https://cmake.org/cmake/help/latest/prop_tgt/UNITY_BUILD.html +# * https://cmake.org/cmake/help/latest/prop_tgt/UNITY_BUILD_MODE.html#prop_tgt:UNITY_BUILD_MODE +## +set_property(SOURCE nc4dispatch.c + PROPERTY + SKIP_UNITY_BUILD_INCLUSION ON) + add_library(netcdf4 OBJECT ${libsrc4_SOURCES}) if (NETCDF_ENABLE_DLL) diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index 3274c89a6c..3969810499 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -25,7 +25,9 @@ #include "ncutf8.h" #include #include +#include #include "ncrc.h" +#include "nclog.h" /** @internal Number of reserved attributes. These attributes are * hidden from the netcdf user, but exist in the implementation @@ -44,7 +46,6 @@ static NC_reservedatt NC_reserved[] = { {NC_ATT_DIMENSION_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*DIMENSION_LIST*/ {NC_ATT_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*NAME*/ {NC_ATT_REFERENCE_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*REFERENCE_LIST*/ - {NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG}, /*_Codecs*/ {NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/ {ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG|VIRTUALFLAG}, /*_IsNetcdf4*/ @@ -53,15 +54,29 @@ static NC_reservedatt NC_reserved[] = { {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Dimid*/ {SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG|VIRTUALFLAG}, /*_SuperblockVersion*/ {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ - {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ - {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_attr */ - {NC_NCZARR_GROUP, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_group */ - {NC_NCZARR_ARRAY, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_array */ - {NC_NCZARR_SUPERBLOCK, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_superblock */ + {NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_ARRAY_DIMENSIONS*/ + {NC_NCZARR_SUPERBLOCK_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_superblock */ + {NC_NCZARR_GROUP_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_group */ + {NC_NCZARR_ARRAY_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_array */ + {NC_NCZARR_ATTRS_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_attrs */ + {NC_NCZARR_ATTR_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_attr */ + {NC_NCZARR_GROUP_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_group */ + {NC_NCZARR_ARRAY_ATTR, READONLYFLAG|HIDDENATTRFLAG|COMPLEXJSON}, /*_nczarr_array */ }; #define NRESERVED (sizeof(NC_reserved) / sizeof(NC_reservedatt)) /*|NC_reservedatt*/ +static const struct Quantizer { +char* name; +int mode; +} NC_quantize_atts[] = { +{NC_QUANTIZE_BITGROOM_ATT_NAME, NC_QUANTIZE_BITGROOM}, +{NC_QUANTIZE_GRANULARBR_ATT_NAME, NC_QUANTIZE_GRANULARBR}, +{NC_QUANTIZE_BITROUND_ATT_NAME, NC_QUANTIZE_BITROUND}, +{NULL,0} +}; + /*Forward */ +static int nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp); static int NC4_move_in_NCList(NC* nc, int new_id); static int bincmp(const void* arg1, const void* arg2); static int sortcmp(const void* arg1, const void* arg2); @@ -160,31 +175,31 @@ nc4_check_name(const char *name, char *norm_name) /* Check for NULL. */ if (!name) - return NC_EINVAL; + return NCTHROW(NC_EINVAL); /* Make sure this is a valid netcdf name. This should be done * before the name is normalized, because it gives better error * codes for bad utf8 strings. */ if ((retval = NC_check_name(name))) - return retval; + return NCTHROW(retval); /* Normalize the name. */ if ((retval = nc_utf8_normalize((const unsigned char *)name, (unsigned char **)&temp))) - return retval; + return NCTHROW(retval); /* Check length of normalized name. */ if (strlen(temp) > NC_MAX_NAME) { free(temp); - return NC_EMAXNAME; + return NCTHROW(NC_EMAXNAME); } /* Copy the normalized name. */ strcpy(norm_name, temp); free(temp); - return NC_NOERR; + return NCTHROW(NC_NOERR); } /** @@ -340,7 +355,7 @@ nc4_nc4f_list_add(NC *nc, const char *path, int mode) nc->dispatchdata = h5; h5->controller = nc; - h5->hdr.sort = NCFIL; + h5->hdr.sort = NCFILE; h5->hdr.name = strdup(path); h5->hdr.id = nc->ext_ncid; @@ -795,9 +810,11 @@ nc4_var_set_ndims(NC_VAR_INFO_T *var, int ndims) /* Allocate space for dimension information. */ if (ndims) { - if (!(var->dim = calloc((size_t)ndims, sizeof(NC_DIM_INFO_T *)))) + if(var->dim != NULL) free(var->dim); + if (!(var->dim = calloc((size_t)ndims, sizeof(NC_DIM_INFO_T *)))) return NC_ENOMEM; - if (!(var->dimids = calloc((size_t)ndims, sizeof(int)))) + if(var->dimids != NULL) free(var->dimids); + if (!(var->dimids = calloc((size_t)ndims, sizeof(int)))) return NC_ENOMEM; /* Initialize dimids to illegal values (-1). See the comment @@ -1544,18 +1561,20 @@ nc4_rec_grp_del(NC_GRP_INFO_T *grp) } /** - * @internal Recursively delete the data for a group (and everything - * it contains) in our internal metadata store. + * @internal Recursively delete the attribute data for all groups and + * vars. We must delete the attribute contents + * before deleteing any metadata because nc_reclaim_data depends + * on the existence of the type info. * * @param grp Pointer to group info struct. * * @return ::NC_NOERR No error. * @author Ed Hartnett, Dennis Heimbigner */ -int +static int nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp) { - int retval; + int retval = NC_NOERR; assert(grp); LOG((3, "%s: grp->name %s", __func__, grp->hdr.name)); @@ -1564,13 +1583,13 @@ nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp) * if there is an error. */ for (size_t i = 0; i < ncindexsize(grp->children); i++) if ((retval = nc4_rec_grp_del_att_data((NC_GRP_INFO_T *)ncindexith(grp->children, i)))) - return retval; + goto done; /* Free attribute data in this group */ for (size_t i = 0; i < ncindexsize(grp->att); i++) { NC_ATT_INFO_T * att = (NC_ATT_INFO_T*)ncindexith(grp->att, i); - if((retval = NC_reclaim_data_all(grp->nc4_info->controller,att->nc_typeid,att->data,att->len))) - return retval; + if((retval = NC_reclaim_data_all(grp->nc4_info->controller,att->nc_typeid,att->data,att->len))) + goto done; att->data = NULL; att->len = 0; att->dirty = 0; @@ -1581,15 +1600,16 @@ nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp) NC_VAR_INFO_T* v = (NC_VAR_INFO_T *)ncindexith(grp->vars, i); for(size_t j=0;jatt);j++) { NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(v->att, j); - if((retval = NC_reclaim_data_all(grp->nc4_info->controller,att->nc_typeid,att->data,att->len))) - return retval; + if((retval = NC_reclaim_data_all(grp->nc4_info->controller,att->nc_typeid,att->data,att->len))) + goto done; att->data = NULL; att->len = 0; att->dirty = 0; } } - return NC_NOERR; +done: + return retval; } /** @@ -1837,7 +1857,8 @@ rec_print_metadata(NC_GRP_INFO_T *grp, int tab_count) NC_FIELD_INFO_T *field; char tabs[MAX_NESTS+1] = ""; char temp_string[10]; - int t, retval, d, i; + int t, retval; + size_t i,d; /* Come up with a number of tabs relative to the group. */ for (t = 0; t < tab_count && t < MAX_NESTS; t++) @@ -1865,7 +1886,7 @@ rec_print_metadata(NC_GRP_INFO_T *grp, int tab_count) for (i = 0; i < ncindexsize(grp->vars); i++) { - int j; + size_t j; char storage_str[NC_MAX_NAME] = ""; char *dims_string = NULL; @@ -1918,7 +1939,7 @@ rec_print_metadata(NC_GRP_INFO_T *grp, int tab_count) /* Is this a compound type? */ if (type->nc_type_class == NC_COMPOUND) { - int j; + size_t j; LOG((3, "compound type")); for (j = 0; j < nclistlength(type->u.c.field); j++) { @@ -2050,6 +2071,26 @@ NC_findreserved(const char* name) #endif } +const char* +NC_findquantizeattname(int mode) +{ + const struct Quantizer* q; + for(q=NC_quantize_atts;q->name;q++) { + if(q->mode == mode) return q->name; + } + return NULL; +} + +int +NC_isquantizeattname(const char* name) +{ + const struct Quantizer* q; + for(q=NC_quantize_atts;q->name;q++) { + if(strcmp(q->name,name)==0) return 1; + } + return 0; +} + /* Ed Hartness requires this function */ static int NC4_move_in_NCList(NC* nc, int new_id) diff --git a/libsrc4/nc4type.c b/libsrc4/nc4type.c index e6136ca110..a7ea6e8a9d 100644 --- a/libsrc4/nc4type.c +++ b/libsrc4/nc4type.c @@ -15,6 +15,8 @@ #include "nc4dispatch.h" #include +#define NC_STRING_LEN sizeof(char *) /**< @internal Size of char *. */ + /** * @internal Find all user-defined types for a location. This finds * all user-defined types in a group. @@ -476,17 +478,22 @@ NC4_inq_typeid(int ncid, const char *name, nc_type *typeidp) NC_FILE_INFO_T *h5; NC_TYPE_INFO_T *type = NULL; char *norm_name = NULL; - int i, retval = NC_NOERR; + int retval = NC_NOERR; - /* Handle atomic types. */ - for (i = 0; i < NUM_ATOMIC_TYPES; i++) - if (!strcmp(name, nc4_atomic_name[i])) - { - if (typeidp) - *typeidp = i; + /* Normalize name. */ + if (!(norm_name = (char*)malloc(strlen(name) + 1))) + {retval = NC_ENOMEM; goto done;} + if ((retval = nc4_normalize_name(name, norm_name))) goto done; + + switch(retval = NC4_inq_atomic_typeid(ncid,norm_name,typeidp)) { + case NC_NOERR: goto done; + case NC_EBADTYPE: retval = NC_NOERR; break; + default: goto done; } + /* Must be a user-defined type */ + /* Find info for this file and group, and set pointer to each. */ if ((retval = nc4_find_grp_h5(ncid, &grp, &h5))) goto done; @@ -498,12 +505,6 @@ NC4_inq_typeid(int ncid, const char *name, nc_type *typeidp) if (name[0] != '/' && strstr(name, "/")) {retval = NC_EINVAL; goto done;} - /* Normalize name. */ - if (!(norm_name = (char*)malloc(strlen(name) + 1))) - {retval = NC_ENOMEM; goto done;} - if ((retval = nc4_normalize_name(name, norm_name))) - goto done; - /* If this is a fqn, then walk the sequence of parent groups to the last group and see if that group has a type of the right name */ if(name[0] == '/') { /* FQN */ @@ -568,57 +569,24 @@ int nc4_get_typeclass(const NC_FILE_INFO_T *h5, nc_type xtype, int *type_class) { int retval = NC_NOERR; + NC_TYPE_INFO_T *type; LOG((4, "%s xtype: %d", __func__, xtype)); assert(type_class); /* If this is an atomic type, the answer is easy. */ - if (xtype <= NC_STRING) - { - switch (xtype) - { - case NC_BYTE: - case NC_UBYTE: - case NC_SHORT: - case NC_USHORT: - case NC_INT: - case NC_UINT: - case NC_INT64: - case NC_UINT64: - /* NC_INT is class used for all integral types */ - *type_class = NC_INT; - break; - - case NC_FLOAT: - case NC_DOUBLE: - /* NC_FLOAT is class used for all floating-point types */ - *type_class = NC_FLOAT; - break; - - case NC_CHAR: - *type_class = NC_CHAR; - break; - - case NC_STRING: - *type_class = NC_STRING; - break; - - default: - BAIL(NC_EBADTYPE); - } + retval = NC4_get_atomic_typeclass(xtype,type_class); + switch (retval) { + case NC_NOERR: goto exit; + case NC_EBADTYPE: break; + default: goto exit; } - else - { - NC_TYPE_INFO_T *type; - /* See if it's a used-defined type */ if ((retval = nc4_find_type(h5, xtype, &type))) BAIL(retval); if (!type) BAIL(NC_EBADTYPE); - *type_class = type->nc_type_class; - } exit: return retval; diff --git a/libsrc4/nc4var.c b/libsrc4/nc4var.c index 9da3a03ec9..aafdc01653 100644 --- a/libsrc4/nc4var.c +++ b/libsrc4/nc4var.c @@ -1599,7 +1599,7 @@ nc4_get_fill_value(NC_FILE_INFO_T *h5, NC_VAR_INFO_T *var, void **fillp) * @internal Get the length, in bytes, of one element of a type in * memory. * - * @param h5 Pointer to HDF5 file info struct. + * @param h5 Pointer to Netcdf-4 file info struct. * @param xtype NetCDF type ID. * @param len Pointer that gets length in bytes. * diff --git a/nc-config.cmake.in b/nc-config.cmake.in index ff857648a0..0440bb9914 100644 --- a/nc-config.cmake.in +++ b/nc-config.cmake.in @@ -41,13 +41,6 @@ else has_nc2="yes" fi -has_nc4="@USE_NETCDF4@" -if [ -z "$has_nc4" -o "$has_nc4" = "OFF" ]; then - has_nc4="no" -else - has_nc4="yes" -fi - has_logging="@NETCDF_ENABLE_LOGGING@" if [ -z "$has_logging" -o "$has_logging" = "OFF" ]; then has_logging="no" @@ -200,7 +193,6 @@ all() echo " --has-dap2 -> $has_dap2" echo " --has-dap4 -> $has_dap4" echo " --has-nc2 -> $has_nc2" - echo " --has-nc4 -> $has_nc4" echo " --has-hdf5 -> $has_hdf5" echo " --has-hdf4 -> $has_hdf4" echo " --has-logging -> $has_logging" @@ -298,10 +290,6 @@ while test $# -gt 0; do echo $has_nc2 ;; - --has-nc4) - echo $has_nc4 - ;; - --has-hdf5) echo $has_hdf5 ;; diff --git a/nc_perf/bm_netcdf4_recs.c b/nc_perf/bm_netcdf4_recs.c index ddedec079e..c6411b0b6d 100644 --- a/nc_perf/bm_netcdf4_recs.c +++ b/nc_perf/bm_netcdf4_recs.c @@ -70,7 +70,7 @@ int main(int argc, char **argv) /* assign per-variable attributes */ if (nc_put_att_text(ncid, temperature_2m_id, "long_name", 36, "Air temperature 2m above the surface")) ERR; if (nc_put_att_text(ncid, temperature_2m_id, "units", 1, "K")) ERR; - if (nc_put_att_float(ncid, temperature_2m_id, "_FillValue", NC_FLOAT, 1, temperature_2m_FillValue_att)) ERR; + if (nc_put_att_float(ncid, temperature_2m_id, NC_FillValue, NC_FLOAT, 1, temperature_2m_FillValue_att)) ERR; if (nc_put_att_float(ncid, temperature_2m_id, "missing_value", NC_FLOAT, 1, temperature_2m_missing_value_att)) ERR; if (nc_put_att_float(ncid, temperature_2m_id, "valid_min", NC_FLOAT, 1, temperature_2m_valid_min_att)) ERR; if (nc_put_att_float(ncid, temperature_2m_id, "valid_max", NC_FLOAT, 1, temperature_2m_valid_max_att)) ERR; diff --git a/nc_perf/tst_wrf_reads.c b/nc_perf/tst_wrf_reads.c index 34d998f035..a49109a8eb 100644 --- a/nc_perf/tst_wrf_reads.c +++ b/nc_perf/tst_wrf_reads.c @@ -22876,7 +22876,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, BASEFLOW_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, BASEFLOW_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -22939,7 +22939,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, ECHANGE_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, ECHANGE_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23002,7 +23002,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, EVLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, EVLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23065,7 +23065,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, EVPINTR_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, EVPINTR_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23128,7 +23128,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, EVPSBLN_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, EVPSBLN_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23191,7 +23191,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, EVPSOIL_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, EVPSOIL_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23254,7 +23254,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, EVPTRNS_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, EVPTRNS_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23317,7 +23317,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, FRSAT_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, FRSAT_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23380,7 +23380,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, FRSNO_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, FRSNO_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23443,7 +23443,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, FRUNST_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, FRUNST_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23506,7 +23506,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, FRWLT_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, FRWLT_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23569,7 +23569,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, GHLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, GHLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23632,7 +23632,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, GRN_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, GRN_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23695,7 +23695,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, GWETPROF_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, GWETPROF_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23758,7 +23758,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, GWETROOT_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, GWETROOT_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23821,7 +23821,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, GWETTOP_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, GWETTOP_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23884,7 +23884,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, LAI_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, LAI_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -23947,7 +23947,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, LHLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, LHLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24010,7 +24010,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, LWLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, LWLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24073,7 +24073,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, PARDFLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, PARDFLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24136,7 +24136,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, PARDRLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, PARDRLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24199,7 +24199,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, PRECSNOLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, PRECSNOLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24262,7 +24262,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, PRECTOTLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, PRECTOTLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24325,7 +24325,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, PRMC_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, PRMC_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24388,7 +24388,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, QINFIL_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, QINFIL_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24451,7 +24451,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, RUNOFF_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, RUNOFF_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24514,7 +24514,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, RZMC_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, RZMC_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24577,7 +24577,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SFMC_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SFMC_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24640,7 +24640,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SHLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SHLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24703,7 +24703,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SMLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SMLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24766,7 +24766,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SNODP_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SNODP_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24829,7 +24829,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SNOMAS_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SNOMAS_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24892,7 +24892,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SPLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SPLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -24955,7 +24955,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SPSNOW_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SPSNOW_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25018,7 +25018,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SPWATR_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SPWATR_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25081,7 +25081,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, SWLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, SWLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25144,7 +25144,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TELAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TELAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25207,7 +25207,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TPSNOW_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TPSNOW_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25270,7 +25270,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSAT_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSAT_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25333,7 +25333,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL1_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL1_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25396,7 +25396,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL2_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL2_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25459,7 +25459,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL3_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL3_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25522,7 +25522,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL4_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL4_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25585,7 +25585,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL5_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL5_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25648,7 +25648,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSOIL6_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSOIL6_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25711,7 +25711,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TSURF_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TSURF_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25774,7 +25774,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TUNST_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TUNST_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25837,7 +25837,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TWLAND_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TWLAND_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25900,7 +25900,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, TWLT_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, TWLT_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } @@ -25963,7 +25963,7 @@ create_merr_file(char *file_name, int cmode) {/* create MERR.nc4.ncdumphs.nc */ { static const float _FillValue_att[1] = {((float)9.9999999e+14)} ; - stat = nc_put_att_float(MERR_grp, WCHANGE_id, "_FillValue", NC_FLOAT, 1, _FillValue_att); + stat = nc_put_att_float(MERR_grp, WCHANGE_id, NC_FillValue, NC_FLOAT, 1, _FillValue_att); check_err(stat,__LINE__,__FILE__); } diff --git a/nc_test/CMakeLists.txt b/nc_test/CMakeLists.txt index cee0f68346..1837d16d7f 100644 --- a/nc_test/CMakeLists.txt +++ b/nc_test/CMakeLists.txt @@ -21,16 +21,15 @@ SET (nc_test_SRC util.c ) - -SET (nc_test_m4_SOURCES test_get test_put test_read test_write) -FOREACH (F ${nc_test_m4_SOURCES}) +set(nc_test_m4_SOURCES test_get test_put test_read test_write) +foreach (F ${nc_test_m4_SOURCES}) GEN_m4(${F} dest) - LIST(APPEND nc_test_SRC ${dest}) -ENDFOREACH() + list(APPEND nc_test_SRC ${dest}) +endforeach() -ADD_EXECUTABLE(nc_test ${nc_test_SRC}) -TARGET_INCLUDE_DIRECTORIES(nc_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) -TARGET_LINK_LIBRARIES(nc_test +add_executable(nc_test ${nc_test_SRC}) +target_include_directories(nc_test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(nc_test netcdf ${HAVE_LIBM} ${ALL_TLL_LIBS} diff --git a/nc_test/t_nc_p5.c b/nc_test/t_nc_p5.c index 82ce46e72e..6613e0052e 100644 --- a/nc_test/t_nc_p5.c +++ b/nc_test/t_nc_p5.c @@ -145,7 +145,7 @@ static const char * const reqattr[] = { "SCALEMIN", "SCALEMAX", "FIELDNAM", - _FillValue + NC_FillValue }; #define NUM_RATTRS 6 @@ -388,9 +388,9 @@ main(int argc, char *argv[]) { int ifill = -1; double dfill = -9999; assert( nc_put_att_int(id, Long_id, - _FillValue, NC_INT, 1, &ifill) == NC_NOERR); + NC_FillValue, NC_INT, 1, &ifill) == NC_NOERR); assert( nc_put_att_double(id, Double_id, - _FillValue, NC_DOUBLE, 1, &dfill) == NC_NOERR); + NC_FillValue, NC_DOUBLE, 1, &dfill) == NC_NOERR); } #ifdef REDEF diff --git a/nc_test/test_byterange.sh b/nc_test/test_byterange.sh index 7ec0f46551..d059ace0c5 100755 --- a/nc_test/test_byterange.sh +++ b/nc_test/test_byterange.sh @@ -7,6 +7,8 @@ set -e # Note: thredds-test is currently down and the URLs need to be replaced +KEYPREFIX="${S3TESTSUBTREE}/byterangefiles" + # Test Urls if test "x$FEATURE_THREDDSTEST" = x1 ; then URL3="https://thredds-test.unidata.ucar.edu/thredds/fileServer/pointData/cf_dsg/example/point.nc#mode=bytes" @@ -19,10 +21,10 @@ URL4c="https://noaa-goes16.s3.amazonaws.com/ABI-L1b-RadF/2022/001/18/OR_ABI-L1b- URL4e="s3://noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes" fi if test "x$FEATURE_S3TESTS" = xyes ; then +# Does not require auth +URL3b="s3://${S3TESTBUCKET}/${KEYPREFIX}/upload3.nc#bytes" # Requires auth -URL3b="s3://${S3TESTBUCKET}/byterangefiles/upload3.nc#bytes" -# Requires auth -URL4d="s3://${S3TESTBUCKET}/byterangefiles/upload4.nc#bytes&aws.profile=unidata" +URL4d="s3://${S3TESTBUCKET}/${KEYPREFIX}/upload4.nc#bytes&aws.profile=unidata" fi URL4f="https://crudata.uea.ac.uk/cru/data/temperature/HadCRUT.4.6.0.0.median.nc#mode=bytes" @@ -37,14 +39,14 @@ testsetup() { U=$1 # Create and upload test files rm -f upload4.nc upload3.nc -${execdir}/../nczarr_test/s3util -u ${U} -k /byterangefiles clear +${execdir}/../nczarr_test/s3util -u ${U} -k "/${KEYPREFIX}" clear ${NCGEN} -lb -3 ${srcdir}/nc_enddef.cdl mv nc_enddef.nc upload3.nc -${execdir}/../nczarr_test/s3util -u ${U} -k /byterangefiles/upload3.nc -f upload3.nc upload +${execdir}/../nczarr_test/s3util -u ${U} -k /${KEYPREFIX}/upload3.nc -f upload3.nc upload if test "x$FEATURE_HDF5" = xyes ; then ${NCGEN} -lb -4 ${srcdir}/nc_enddef.cdl mv nc_enddef.nc upload4.nc -${execdir}/../nczarr_test/s3util -u ${U} -k /byterangefiles/upload4.nc -f upload4.nc upload +${execdir}/../nczarr_test/s3util -u ${U} -k /${KEYPREFIX}/upload4.nc -f upload4.nc upload fi rm -f tst_http_nc3.cdl tst_http_nc4?.cdl } @@ -52,7 +54,7 @@ rm -f tst_http_nc3.cdl tst_http_nc4?.cdl testcleanup() { U=$1 rm -f upload4.nc upload3.nc -${execdir}/../nczarr_test/s3util -u ${U} -k /byterangefiles clear +${execdir}/../nczarr_test/s3util -u ${U} -k /${KEYPREFIX} clear } testbytes() { @@ -80,8 +82,8 @@ if test "x$K" != "x$EXPECTED" ; then echo "test_http: -k flag mismatch: expected=$EXPECTED have=$K" exit 1 fi -rm -f tmp_${TAG}.cdl # Now test the reading of at least the metadata +rm -f tmp_${TAG}.cdl ${NCDUMP} -n nc_enddef "$U" >tmp_${TAG}.cdl # compare diff -wb tmp_$TAG.cdl ${srcdir}/nc_enddef.cdl diff --git a/nc_test/tests.h b/nc_test/tests.h index 5b24b06420..731baefb8f 100644 --- a/nc_test/tests.h +++ b/nc_test/tests.h @@ -197,8 +197,7 @@ extern size_t numTypes; /* number of netCDF data types to test */ extern "C" { #endif - - /* Non-standard internal types */ +/* Non-standard internal types */ #ifdef HAVE_SYS_TYPES_H # include diff --git a/nc_test/tst_diskless2.c b/nc_test/tst_diskless2.c index 85e02e4ad6..38e26f246a 100644 --- a/nc_test/tst_diskless2.c +++ b/nc_test/tst_diskless2.c @@ -244,13 +244,13 @@ main() {/* create tst_diskless2.nc */ { static const double p_FillValue_att[1] = {((double)-9999)} ; - stat = nc_put_att_double(root_grp, p_id, "_FillValue", NC_DOUBLE, 1, p_FillValue_att); + stat = nc_put_att_double(root_grp, p_id, NC_FillValue, NC_DOUBLE, 1, p_FillValue_att); check_err(stat,__LINE__,__FILE__); } { static const int rh_FillValue_att[1] = {-1} ; - stat = nc_put_att_int(root_grp, rh_id, "_FillValue", NC_INT, 1, rh_FillValue_att); + stat = nc_put_att_int(root_grp, rh_id, NC_FillValue, NC_INT, 1, rh_FillValue_att); check_err(stat,__LINE__,__FILE__); } diff --git a/nc_test/tst_formats.c b/nc_test/tst_formats.c index 3ee26c602f..1960514f01 100644 --- a/nc_test/tst_formats.c +++ b/nc_test/tst_formats.c @@ -226,7 +226,7 @@ main(int argc, char **argv) } else { - ret = nc_put_att_int(ncid, varid, "_FillValue", NC_INT, 1, + ret = nc_put_att_int(ncid, varid, NC_FillValue, NC_INT, 1, &fill_value); } diff --git a/nc_test/tst_global_fillval.c b/nc_test/tst_global_fillval.c index 0d8c6ca15a..b7c1d7e3fd 100644 --- a/nc_test/tst_global_fillval.c +++ b/nc_test/tst_global_fillval.c @@ -61,7 +61,7 @@ main(int argc, char **argv) int ncid, cmode, fillv = 9; cmode = NC_CLOBBER | formats[i]; if (nc_create(FILE_NAME, cmode, &ncid)) ERR; - if (nc_put_att_int(ncid, NC_GLOBAL, "_FillValue", NC_INT, 1, &fillv)) ERR; + if (nc_put_att_int(ncid, NC_GLOBAL, NC_FillValue, NC_INT, 1, &fillv)) ERR; if (nc_close(ncid)) ERR; } diff --git a/nc_test4/Makefile.am b/nc_test4/Makefile.am index a07e20856d..369d6e1e12 100644 --- a/nc_test4/Makefile.am +++ b/nc_test4/Makefile.am @@ -79,7 +79,6 @@ TESTS += tst_szip.sh endif # Filter Tests (requires ncdump and ncgen and HDF5) -if USE_HDF5 if NETCDF_ENABLE_PLUGINS if NETCDF_ENABLE_FILTER_TESTING extradir = @@ -111,7 +110,6 @@ endif endif # NETCDF_ENABLE_FILTER_TESTING endif # NETCDF_ENABLE_PLUGINS -endif # USE_HDF5 endif # NETCDF_BUILD_UTILITIES # Zstandard tests. @@ -164,8 +162,15 @@ tmp_bzip2.c bzip2.nc noop.nc tmp_*.dmp tmp_*.cdl ref_fixedstring.h5 DISTCLEANFILES = findplugin.sh run_par_test.sh run_par_warn_test.sh -clean-local: - rm -fr testdir_* testset_* - # If valgrind is present, add valgrind targets. @VALGRIND_CHECK_RULES@ + +# The (otherwise unused) program build_fixedstring.c +# is used to generate the test file ref_fixedstring.h5. +# That test file is built and included as part of the distribution, +# so the build_fixedstring.c program generally does not need to +# be executed unless the test file needs to be modified.. +noinst_PROGRAMS = build_fixedstring +build_fixedstring_SOURCES = build_fixedstring.c +ref_fixedstring.h5: + ${builddir}/build_fixedstring diff --git a/nc_test4/tst_atts3.c b/nc_test4/tst_atts3.c index 84726014d4..19bfcdc72e 100644 --- a/nc_test4/tst_atts3.c +++ b/nc_test4/tst_atts3.c @@ -2392,17 +2392,17 @@ main(int argc, char **argv) int res = 0; /* This should return error, because attribute has too many values */ #if 1 - res=nc_put_att_int(ncid, varid, "_FillValue", NC_INT, 2, var_FillValue_atts); + res=nc_put_att_int(ncid, varid, NC_FillValue, NC_INT, 2, var_FillValue_atts); if(res != NC_EINVAL) ERR; #else - if ((res=nc_put_att_int(ncid, varid, "_FillValue", NC_INT, 2, var_FillValue_atts)) + if ((res=nc_put_att_int(ncid, varid, NC_FillValue, NC_INT, 2, var_FillValue_atts)) != NC_EINVAL) ERR; #endif /* This also should return error, because types don't match */ - if (nc_put_att_float(ncid, varid, "_FillValue", NC_FLOAT, 1, &var_FillValue_att) + if (nc_put_att_float(ncid, varid, NC_FillValue, NC_FLOAT, 1, &var_FillValue_att) != NC_EBADTYPE) ERR; /* This should succeed, _FillValue is valid */ - if (nc_put_att_int(ncid, varid, "_FillValue", NC_INT, 1, var_FillValue_atts)) ERR; + if (nc_put_att_int(ncid, varid, NC_FillValue, NC_INT, 1, var_FillValue_atts)) ERR; } if (nc_close(ncid)) ERR; diff --git a/nc_test4/tst_bloscfail.sh b/nc_test4/tst_bloscfail.sh index 14dfb42d1c..f5364d6e5e 100755 --- a/nc_test4/tst_bloscfail.sh +++ b/nc_test4/tst_bloscfail.sh @@ -31,7 +31,7 @@ sed -e 's/[ ]*\([^ ].*\)/\1/' <$1 >$2 } if test "x$TESTNCZARR" = x1 ; then -. "$srcdir/test_nczarr.sh" +. $srcdir/test_nczarr.sh fi if ! avail blosc; then echo "Blosc compressor not found"; exit 0; fi diff --git a/nc_test4/tst_camrun.c b/nc_test4/tst_camrun.c index d7c13d36ab..78fef4a769 100644 --- a/nc_test4/tst_camrun.c +++ b/nc_test4/tst_camrun.c @@ -5354,7 +5354,7 @@ main() if (nc_put_att_text(ncid, nsteph_id, "long_name", 16, "current timestep")) ERR; { /* _FillValue */ static const double ABSORB_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ABSORB_id, "_FillValue", NC_DOUBLE, 1, ABSORB_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ABSORB_id, NC_FillValue, NC_DOUBLE, 1, ABSORB_FillValue_att)) ERR; } { /* missing_value */ static const double ABSORB_missing_value_att[1] = {1e+36} ; @@ -5365,7 +5365,7 @@ main() if (nc_put_att_text(ncid, ABSORB_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AEROD_v_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AEROD_v_id, "_FillValue", NC_DOUBLE, 1, AEROD_v_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AEROD_v_id, NC_FillValue, NC_DOUBLE, 1, AEROD_v_FillValue_att)) ERR; } { /* missing_value */ static const double AEROD_v_missing_value_att[1] = {1e+36} ; @@ -5376,7 +5376,7 @@ main() if (nc_put_att_text(ncid, AEROD_v_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODABS_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODABS_id, "_FillValue", NC_DOUBLE, 1, AODABS_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODABS_id, NC_FillValue, NC_DOUBLE, 1, AODABS_FillValue_att)) ERR; } { /* missing_value */ static const double AODABS_missing_value_att[1] = {1e+36} ; @@ -5387,7 +5387,7 @@ main() if (nc_put_att_text(ncid, AODABS_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODDUST1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODDUST1_id, "_FillValue", NC_DOUBLE, 1, AODDUST1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODDUST1_id, NC_FillValue, NC_DOUBLE, 1, AODDUST1_FillValue_att)) ERR; } { /* missing_value */ static const double AODDUST1_missing_value_att[1] = {1e+36} ; @@ -5398,7 +5398,7 @@ main() if (nc_put_att_text(ncid, AODDUST1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODDUST2_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODDUST2_id, "_FillValue", NC_DOUBLE, 1, AODDUST2_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODDUST2_id, NC_FillValue, NC_DOUBLE, 1, AODDUST2_FillValue_att)) ERR; } { /* missing_value */ static const double AODDUST2_missing_value_att[1] = {1e+36} ; @@ -5409,7 +5409,7 @@ main() if (nc_put_att_text(ncid, AODDUST2_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODDUST3_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODDUST3_id, "_FillValue", NC_DOUBLE, 1, AODDUST3_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODDUST3_id, NC_FillValue, NC_DOUBLE, 1, AODDUST3_FillValue_att)) ERR; } { /* missing_value */ static const double AODDUST3_missing_value_att[1] = {1e+36} ; @@ -5420,7 +5420,7 @@ main() if (nc_put_att_text(ncid, AODDUST3_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODMODE1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODMODE1_id, "_FillValue", NC_DOUBLE, 1, AODMODE1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODMODE1_id, NC_FillValue, NC_DOUBLE, 1, AODMODE1_FillValue_att)) ERR; } { /* missing_value */ static const double AODMODE1_missing_value_att[1] = {1e+36} ; @@ -5431,7 +5431,7 @@ main() if (nc_put_att_text(ncid, AODMODE1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODMODE2_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODMODE2_id, "_FillValue", NC_DOUBLE, 1, AODMODE2_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODMODE2_id, NC_FillValue, NC_DOUBLE, 1, AODMODE2_FillValue_att)) ERR; } { /* missing_value */ static const double AODMODE2_missing_value_att[1] = {1e+36} ; @@ -5442,7 +5442,7 @@ main() if (nc_put_att_text(ncid, AODMODE2_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODMODE3_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODMODE3_id, "_FillValue", NC_DOUBLE, 1, AODMODE3_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODMODE3_id, NC_FillValue, NC_DOUBLE, 1, AODMODE3_FillValue_att)) ERR; } { /* missing_value */ static const double AODMODE3_missing_value_att[1] = {1e+36} ; @@ -5453,7 +5453,7 @@ main() if (nc_put_att_text(ncid, AODMODE3_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double AODVIS_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, AODVIS_id, "_FillValue", NC_DOUBLE, 1, AODVIS_FillValue_att)) ERR; + if (nc_put_att_double(ncid, AODVIS_id, NC_FillValue, NC_DOUBLE, 1, AODVIS_FillValue_att)) ERR; } { /* missing_value */ static const double AODVIS_missing_value_att[1] = {1e+36} ; @@ -5533,7 +5533,7 @@ main() if (nc_put_att_text(ncid, BPROD_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double BURDEN1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, BURDEN1_id, "_FillValue", NC_DOUBLE, 1, BURDEN1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, BURDEN1_id, NC_FillValue, NC_DOUBLE, 1, BURDEN1_FillValue_att)) ERR; } { /* missing_value */ static const double BURDEN1_missing_value_att[1] = {1e+36} ; @@ -5544,7 +5544,7 @@ main() if (nc_put_att_text(ncid, BURDEN1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double BURDEN2_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, BURDEN2_id, "_FillValue", NC_DOUBLE, 1, BURDEN2_FillValue_att)) ERR; + if (nc_put_att_double(ncid, BURDEN2_id, NC_FillValue, NC_DOUBLE, 1, BURDEN2_FillValue_att)) ERR; } { /* missing_value */ static const double BURDEN2_missing_value_att[1] = {1e+36} ; @@ -5555,7 +5555,7 @@ main() if (nc_put_att_text(ncid, BURDEN2_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double BURDEN3_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, BURDEN3_id, "_FillValue", NC_DOUBLE, 1, BURDEN3_FillValue_att)) ERR; + if (nc_put_att_double(ncid, BURDEN3_id, NC_FillValue, NC_DOUBLE, 1, BURDEN3_FillValue_att)) ERR; } { /* missing_value */ static const double BURDEN3_missing_value_att[1] = {1e+36} ; @@ -5647,7 +5647,7 @@ main() if (nc_put_att_text(ncid, DTV_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double EXTINCT_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, EXTINCT_id, "_FillValue", NC_DOUBLE, 1, EXTINCT_FillValue_att)) ERR; + if (nc_put_att_double(ncid, EXTINCT_id, NC_FillValue, NC_DOUBLE, 1, EXTINCT_FillValue_att)) ERR; } { /* missing_value */ static const double EXTINCT_missing_value_att[1] = {1e+36} ; @@ -5855,7 +5855,7 @@ main() if (nc_put_att_text(ncid, OCNFRAC_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_bc_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_bc_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_bc_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_bc_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_bc_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_bc_a1_missing_value_att[1] = {1e+36} ; @@ -5866,7 +5866,7 @@ main() if (nc_put_att_text(ncid, ODV_bc_a1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_dst_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_dst_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_dst_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_dst_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_dst_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_dst_a1_missing_value_att[1] = {1e+36} ; @@ -5877,7 +5877,7 @@ main() if (nc_put_att_text(ncid, ODV_dst_a1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_dst_a3_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_dst_a3_id, "_FillValue", NC_DOUBLE, 1, ODV_dst_a3_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_dst_a3_id, NC_FillValue, NC_DOUBLE, 1, ODV_dst_a3_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_dst_a3_missing_value_att[1] = {1e+36} ; @@ -5888,7 +5888,7 @@ main() if (nc_put_att_text(ncid, ODV_dst_a3_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_ncl_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_ncl_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_ncl_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_ncl_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_ncl_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_ncl_a1_missing_value_att[1] = {1e+36} ; @@ -5899,7 +5899,7 @@ main() if (nc_put_att_text(ncid, ODV_ncl_a1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_ncl_a3_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_ncl_a3_id, "_FillValue", NC_DOUBLE, 1, ODV_ncl_a3_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_ncl_a3_id, NC_FillValue, NC_DOUBLE, 1, ODV_ncl_a3_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_ncl_a3_missing_value_att[1] = {1e+36} ; @@ -5910,7 +5910,7 @@ main() if (nc_put_att_text(ncid, ODV_ncl_a3_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_pom_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_pom_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_pom_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_pom_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_pom_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_pom_a1_missing_value_att[1] = {1e+36} ; @@ -5921,7 +5921,7 @@ main() if (nc_put_att_text(ncid, ODV_pom_a1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_so4_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_so4_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_so4_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_so4_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_so4_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_so4_a1_missing_value_att[1] = {1e+36} ; @@ -5932,7 +5932,7 @@ main() if (nc_put_att_text(ncid, ODV_so4_a1_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double ODV_soa_a1_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, ODV_soa_a1_id, "_FillValue", NC_DOUBLE, 1, ODV_soa_a1_FillValue_att)) ERR; + if (nc_put_att_double(ncid, ODV_soa_a1_id, NC_FillValue, NC_DOUBLE, 1, ODV_soa_a1_FillValue_att)) ERR; } { /* missing_value */ static const double ODV_soa_a1_missing_value_att[1] = {1e+36} ; @@ -6083,7 +6083,7 @@ main() if (nc_put_att_text(ncid, SRFRAD_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double SSAVIS_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, SSAVIS_id, "_FillValue", NC_DOUBLE, 1, SSAVIS_FillValue_att)) ERR; + if (nc_put_att_double(ncid, SSAVIS_id, NC_FillValue, NC_DOUBLE, 1, SSAVIS_FillValue_att)) ERR; } { /* missing_value */ static const double SSAVIS_missing_value_att[1] = {1e+36} ; @@ -6152,7 +6152,7 @@ main() if (nc_put_att_text(ncid, TROP_FD_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double TROP_P_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, TROP_P_id, "_FillValue", NC_DOUBLE, 1, TROP_P_FillValue_att)) ERR; + if (nc_put_att_double(ncid, TROP_P_id, NC_FillValue, NC_DOUBLE, 1, TROP_P_FillValue_att)) ERR; } { /* missing_value */ static const double TROP_P_missing_value_att[1] = {1e+36} ; @@ -6166,7 +6166,7 @@ main() if (nc_put_att_text(ncid, TROP_PD_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double TROP_T_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, TROP_T_id, "_FillValue", NC_DOUBLE, 1, TROP_T_FillValue_att)) ERR; + if (nc_put_att_double(ncid, TROP_T_id, NC_FillValue, NC_DOUBLE, 1, TROP_T_FillValue_att)) ERR; } { /* missing_value */ static const double TROP_T_missing_value_att[1] = {1e+36} ; @@ -6177,7 +6177,7 @@ main() if (nc_put_att_text(ncid, TROP_T_id, "cell_methods", 10, "time: mean")) ERR; { /* _FillValue */ static const double TROP_Z_FillValue_att[1] = {1e+36} ; - if (nc_put_att_double(ncid, TROP_Z_id, "_FillValue", NC_DOUBLE, 1, TROP_Z_FillValue_att)) ERR; + if (nc_put_att_double(ncid, TROP_Z_id, NC_FillValue, NC_DOUBLE, 1, TROP_Z_FillValue_att)) ERR; } { /* missing_value */ static const double TROP_Z_missing_value_att[1] = {1e+36} ; diff --git a/nc_test4/tst_compounds.c b/nc_test4/tst_compounds.c index ae7892a628..75ddeb5609 100644 --- a/nc_test4/tst_compounds.c +++ b/nc_test4/tst_compounds.c @@ -621,7 +621,7 @@ main(int argc, char **argv) #define DIM6_LEN 3 #define VAR6_NAME "obs" #define VAR6_RANK 1 -#define ATT6_NAME "_FillValue" +#define ATT6_NAME NC_FillValue #define ATT6_LEN 1 int ncid; int dimid, varid; diff --git a/nc_test4/tst_coords.c b/nc_test4/tst_coords.c index f7133a57f9..ebdd9990be 100644 --- a/nc_test4/tst_coords.c +++ b/nc_test4/tst_coords.c @@ -419,7 +419,7 @@ main(int argc, char **argv) } { /* _FillValue */ static const float pr_FillValue_att[1] = {1e+20f} ; - stat = nc_put_att_float(root_grp, pr_id, "_FillValue", NC_FLOAT, 1, pr_FillValue_att); + stat = nc_put_att_float(root_grp, pr_id, NC_FillValue, NC_FLOAT, 1, pr_FillValue_att); check_err(stat,__LINE__,__FILE__); } { /* cell_methods */ diff --git a/nc_test4/tst_enums.c b/nc_test4/tst_enums.c index ba2cf5fe4b..5d6342cd4d 100644 --- a/nc_test4/tst_enums.c +++ b/nc_test4/tst_enums.c @@ -171,7 +171,7 @@ main(int argc, char **argv) #define DIM2_LEN 5 #define VAR2_NAME "primary_cloud" #define VAR2_RANK 1 -#define ATT2_NAME "_FillValue" +#define ATT2_NAME NC_FillValue #define ATT2_LEN 1 printf("*** testing enum fill value ..."); diff --git a/nc_test4/tst_fill_attr_vanish.c b/nc_test4/tst_fill_attr_vanish.c index 72cd17a9e2..4960f227ef 100644 --- a/nc_test4/tst_fill_attr_vanish.c +++ b/nc_test4/tst_fill_attr_vanish.c @@ -90,7 +90,7 @@ int main() } printf("**** Expecting NC_ELATEFILL when adding _FillValue attribute if variable exists.\n"); - status = nc_put_att_int(ncid, test_id, "_FillValue", NC_INT, 1, test_fill_val); + status = nc_put_att_int(ncid, test_id, NC_FillValue, NC_INT, 1, test_fill_val); if (status != NC_ELATEFILL) { fflush(stdout); /* Make sure our stdout is synced with stderr. */ err++; diff --git a/nc_test4/tst_fillbug.c b/nc_test4/tst_fillbug.c index 6d4f14da85..600369c76e 100644 --- a/nc_test4/tst_fillbug.c +++ b/nc_test4/tst_fillbug.c @@ -61,11 +61,11 @@ main() static const float p_FillValue_atts[] = {NC_FILL_FLOAT, -99} ; int p_FillValue_att = -99 ; /* This should returns error, too many attribute vals */ - if (nc_put_att_float(ncid, p_id, "_FillValue", NC_FLOAT, 2, p_FillValue_atts) != NC_EINVAL) ERR; + if (nc_put_att_float(ncid, p_id, NC_FillValue, NC_FLOAT, 2, p_FillValue_atts) != NC_EINVAL) ERR; /* This also should return error, wrong type */ - if (nc_put_att_int(ncid, p_id, "_FillValue", NC_INT, 1, &p_FillValue_att) != NC_EBADTYPE) ERR; + if (nc_put_att_int(ncid, p_id, NC_FillValue, NC_INT, 1, &p_FillValue_att) != NC_EBADTYPE) ERR; /* This should succeed, _FillValue is valid */ - if (nc_put_att_float(ncid, p_id, "_FillValue", NC_FLOAT, 1, p_FillValue_atts)) ERR; + if (nc_put_att_float(ncid, p_id, NC_FillValue, NC_FLOAT, 1, p_FillValue_atts)) ERR; } /* Read the record of non-existent data. */ diff --git a/nc_test4/tst_fills2.c b/nc_test4/tst_fills2.c index d29a6adfe7..2f221d37cc 100644 --- a/nc_test4/tst_fills2.c +++ b/nc_test4/tst_fills2.c @@ -103,12 +103,12 @@ main(int argc, char **argv) if (nc_def_dim(ncid, "sentence", NC_UNLIMITED, &dimid)) ERR; if (nc_def_var(ncid, STRING_VAR_NAME, NC_STRING, NDIMS_STRING, &dimid, &varid)) ERR; - if (nc_put_att_string(ncid, varid, "_FillValue", FILLVALUE_LEN, + if (nc_put_att_string(ncid, varid, NC_FillValue, FILLVALUE_LEN, missing_val)) ERR; /* Check it out. */ if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in[0])) ERR; if (nc_free_string(FILLVALUE_LEN, (char **)missing_val_in)) ERR; @@ -139,7 +139,7 @@ main(int argc, char **argv) /* Now re-open file, read data, and check values again. */ if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR; if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in[0])) ERR; if (nc_free_string(FILLVALUE_LEN, (char **)missing_val_in)) ERR; @@ -194,11 +194,11 @@ main(int argc, char **argv) if (nc_create(FILE_NAME, NC_NETCDF4, &ncid)) ERR; if (nc_def_dim(ncid, "rec", NC_UNLIMITED, &dimid)) ERR; if (nc_def_var(ncid, STRING_VAR_NAME, NC_STRING, NDIMS_STRING, &dimid, &varid)) ERR; - if (nc_put_att_string(ncid, varid, "_FillValue", FILLVALUE_LEN, missing_val)) ERR; + if (nc_put_att_string(ncid, varid, NC_FillValue, FILLVALUE_LEN, missing_val)) ERR; /* Check it out. */ if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", (char **)missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (missing_val[0] != missing_val_in[0]) ERR; /* Write one string, leaving some blank records which will then @@ -222,7 +222,7 @@ main(int argc, char **argv) /* Now re-open file, read data, and check values again. */ if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR; if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", (char **)missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (NULL != missing_val_in[0]) ERR; if (nc_free_string(FILLVALUE_LEN, (char **)missing_val_in)) ERR; @@ -262,11 +262,11 @@ main(int argc, char **argv) if (nc_create(FILE_NAME, NC_NETCDF4, &ncid)) ERR; if (nc_def_dim(ncid, "rec", NC_UNLIMITED, &dimid)) ERR; if (nc_def_var(ncid, STRING_VAR_NAME2, NC_STRING, NDIMS_STRING, &dimid, &varid)) ERR; - if (nc_put_att_string(ncid, varid, "_FillValue", FILLVALUE_LEN2, missing_val)) ERR; + if (nc_put_att_string(ncid, varid, NC_FillValue, FILLVALUE_LEN2, missing_val)) ERR; /* Check it out. */ if (nc_inq_varid(ncid, STRING_VAR_NAME2, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", (char **)missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in[0])) ERR; if (nc_free_string(FILLVALUE_LEN2, (char **)missing_val_in)) ERR; @@ -291,7 +291,7 @@ main(int argc, char **argv) /* Now re-open file, read data, and check values again. */ if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR; if (nc_inq_varid(ncid, STRING_VAR_NAME2, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", (char **)missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, (char **)missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in[0])) ERR; if (nc_free_string(FILLVALUE_LEN2, (char **)missing_val_in)) ERR; @@ -335,12 +335,12 @@ main(int argc, char **argv) if (nc_def_dim(ncid, "Lunar_Years", NC_UNLIMITED, &dimid)) ERR; if (nc_def_var(ncid, STRING_VAR_NAME, NC_STRING, NDIMS_STRING, &dimid, &varid)) ERR; - if (nc_put_att_string(ncid, varid, "_FillValue", FILLVALUE_LEN, + if (nc_put_att_string(ncid, varid, NC_FillValue, FILLVALUE_LEN, missing_val)) ERR; /* Check it out. */ if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", &missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, &missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in)) ERR; if (nc_free_string(FILLVALUE_LEN, &missing_val_in)) ERR; @@ -353,7 +353,7 @@ main(int argc, char **argv) /* Now re-open file, and check again. */ if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR; if (nc_inq_varid(ncid, STRING_VAR_NAME, &varid_in)) ERR; - if (nc_get_att_string(ncid, varid_in, "_FillValue", &missing_val_in)) ERR; + if (nc_get_att_string(ncid, varid_in, NC_FillValue, &missing_val_in)) ERR; if (strcmp(missing_val[0], missing_val_in)) ERR; if (nc_free_string(FILLVALUE_LEN, &missing_val_in)) ERR; @@ -374,7 +374,7 @@ main(int argc, char **argv) if (nc_create(FILE_NAME, NC_NETCDF4 | NC_CLASSIC_MODEL, &ncid)) ERR; if (nc_def_dim(ncid, "x", 182, &dimids[0])) ERR; if (nc_def_var(ncid, "u_obs", NC_FLOAT, 1, dimids, &varid)) ERR; - if (nc_put_att_double (ncid, varid, "_FillValue", NC_FLOAT, 1, &fillval)) ERR; + if (nc_put_att_double (ncid, varid, NC_FillValue, NC_FLOAT, 1, &fillval)) ERR; if (nc_close(ncid)) ERR; SUMMARIZE_ERR; } diff --git a/nc_test4/tst_filter_misc.sh b/nc_test4/tst_filter_misc.sh index e24191a71c..a6497c8d1a 100755 --- a/nc_test4/tst_filter_misc.sh +++ b/nc_test4/tst_filter_misc.sh @@ -4,7 +4,7 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh if test "x$TESTNCZARR" = x1; then -. $srcdir/test_nczarr.sh +. ${builddir}/test_nczarr.sh fi set -e diff --git a/nc_test4/tst_filter_vlen.sh b/nc_test4/tst_filter_vlen.sh index f2b093b0df..5c68e73e20 100755 --- a/nc_test4/tst_filter_vlen.sh +++ b/nc_test4/tst_filter_vlen.sh @@ -6,7 +6,7 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh if test "x$TESTNCZARR" = x1; then -. $srcdir/test_nczarr.sh +. ${builddir}/test_nczarr.sh fi set -e diff --git a/nc_test4/tst_filterinstall.sh b/nc_test4/tst_filterinstall.sh index 942dc678c3..32981c3ec8 100755 --- a/nc_test4/tst_filterinstall.sh +++ b/nc_test4/tst_filterinstall.sh @@ -8,7 +8,7 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh if test "x$TESTNCZARR" = x1; then -. $srcdir/test_nczarr.sh +. ${builddir}/test_nczarr.sh fi set -e diff --git a/nc_test4/tst_udf.c b/nc_test4/tst_udf.c index 887e603d87..1aa19f3f88 100644 --- a/nc_test4/tst_udf.c +++ b/nc_test4/tst_udf.c @@ -17,8 +17,8 @@ #define FILE_NAME "tst_udf.nc" -#ifdef _MSC_VER -static int +#if defined(_WIN32) || defined(_MSC_VER) +int NC4_no_show_metadata(int ncid) { return NC_NOERR; diff --git a/nc_test4/tst_unknown.sh b/nc_test4/tst_unknown.sh index 8bf30c04d8..0f4ec2e6f8 100755 --- a/nc_test4/tst_unknown.sh +++ b/nc_test4/tst_unknown.sh @@ -8,7 +8,7 @@ THISDIR=`pwd` cd $ISOPATH if test "x$TESTNCZARR" = x1 ; then -. "$srcdir/test_nczarr.sh" +. ${builddir}/test_nczarr.sh s3isolate fi diff --git a/ncdap_test/CMakeLists.txt b/ncdap_test/CMakeLists.txt index 40189f4720..a633596b9b 100644 --- a/ncdap_test/CMakeLists.txt +++ b/ncdap_test/CMakeLists.txt @@ -33,39 +33,42 @@ IF(NETCDF_ENABLE_TESTS) ENDIF(HAVE_BASH) ENDIF() - IF(NOT WIN32) + if(NOT MSVC) add_bin_env_test(ncdap t_dap3a) add_bin_env_test(ncdap test_cvt) add_bin_env_test(ncdap test_vara) - ENDIF() - - IF(NETCDF_ENABLE_EXTERNAL_SERVER_TESTS) - IF(NETCDF_ENABLE_DAP_REMOTE_TESTS) + endif() - IF(NETCDF_BUILD_UTILITIES) + if(NETCDF_BUILD_UTILITIES) add_sh_test(ncdap tst_ber) - add_sh_test(ncdap tst_remote3) - IF(HAVE_BASH) - SET_TESTS_PROPERTIES(ncdap_tst_remote3 PROPERTIES RUN_SERIAL TRUE) - ENDIF(HAVE_BASH) - add_sh_test(ncdap tst_zero_len_var) - add_sh_test(ncdap tst_fillmismatch) - IF(NETCDF_ENABLE_DAP_LONG_TESTS) - add_sh_test(ncdap tst_longremote3) - SET_TESTS_PROPERTIES(ncdap_tst_longremote3 PROPERTIES RUN_SERIAL TRUE) - ENDIF(NETCDF_ENABLE_DAP_LONG_TESTS) - IF(TRUE) - # Apparently iridl.ldeo.columbia.edu is down for now - add_sh_test(ncdap tst_encode) - # not yet fixed - add_sh_test(ncdap tst_hyrax) - ENDIF() - ENDIF(NETCDF_BUILD_UTILITIES) - IF(NETCDF_ENABLE_EXTERNAL_SERVER_TESTS) + endif() + + IF(NETCDF_ENABLE_EXTERNAL_SERVER_TESTS) + IF(NETCDF_ENABLE_DAP_REMOTE_TESTS) + + IF(NETCDF_BUILD_UTILITIES) + add_sh_test(ncdap tst_ber) + add_sh_test(ncdap tst_remote3) + IF(HAVE_BASH) + SET_TESTS_PROPERTIES(ncdap_tst_remote3 PROPERTIES RUN_SERIAL TRUE) + ENDIF(HAVE_BASH) + add_sh_test(ncdap tst_zero_len_var) + add_sh_test(ncdap tst_fillmismatch) + IF(NETCDF_ENABLE_DAP_LONG_TESTS) + add_sh_test(ncdap tst_longremote3) + SET_TESTS_PROPERTIES(ncdap_tst_longremote3 PROPERTIES RUN_SERIAL TRUE) add_bin_test(ncdap test_manyurls) SET_TESTS_PROPERTIES(ncdap_test_manyurls PROPERTIES RUN_SERIAL TRUE) + ENDIF(NETCDF_ENABLE_DAP_LONG_TESTS) + IF(TRUE) + # Apparently iridl.ldeo.columbia.edu is down for now + add_sh_test(ncdap tst_encode) + # not yet fixed + add_sh_test(ncdap tst_hyrax) + ENDIF() + ENDIF(NETCDF_BUILD_UTILITIES) ENDIF() - ENDIF() + ENDIF() IF(NETCDF_ENABLE_DAP_AUTH_TESTS) ##obsolete add_bin_test(ncdap t_auth) @@ -81,8 +84,7 @@ IF(NETCDF_ENABLE_TESTS) ### #add_bin_test(ncdap t_ncf330) - ENDIF() -ENDIF() +ENDIF(NETCDF_ENABLE_TESTS) ADD_SUBDIRECTORY(testdata3) ADD_SUBDIRECTORY(expected3) diff --git a/ncdap_test/Makefile.am b/ncdap_test/Makefile.am index e7d8ff834f..3162280f40 100644 --- a/ncdap_test/Makefile.am +++ b/ncdap_test/Makefile.am @@ -44,7 +44,7 @@ findtestserver_SOURCES = findtestserver.c pingurl_SOURCES = pingurl.c if NETCDF_BUILD_UTILITIES -TESTS += tst_ber.sh tst_remote3.sh tst_formatx.sh testurl.sh tst_fillmismatch.sh tst_zero_len_var.sh +TESTS += tst_ber.sh tst_formatx.sh tst_fillmismatch.sh tst_zero_len_var.sh endif if NETCDF_ENABLE_EXTERNAL_SERVER_TESTS @@ -52,13 +52,12 @@ if NETCDF_ENABLE_DAP_REMOTE_TESTS if NETCDF_BUILD_UTILITIES # Remote servers +TESTS += tst_ber.sh tst_formatx.sh tst_fillmismatch.sh tst_zero_len_var.sh +TESTS += tst_remote3.sh testurl.sh # iridl.ldeo.columbia.edu TESTS += tst_encode.sh # test.opendap.org -TESTS += tst_hyrax.sh - -TESTS += test_partvar - +#TESTS += tst_hyrax.sh # currently down # Various TESTS += tst_longremote3.sh tst_longremote3.log: tst_remote3.log @@ -71,10 +70,9 @@ test_manyurls.log: tst_longremote3.log TESTS += test_manyurls endif +TESTS += test_partvar test_partvar_SOURCES = test_partvar.c - t_misc_SOURCES = t_misc.c - #TESTS += t_ncf330 TESTS += t_misc @@ -137,9 +135,8 @@ clean-local: clean-local-check .PHONY: clean-local-check clean-local-check: - -rm -rf results - -rm -f .dodsrc - -rm -fr testdir_* testset_* + rm -rf results + rm -f .dodsrc # If valgrind is present, add valgrind targets. @VALGRIND_CHECK_RULES@ diff --git a/ncdap_test/manyurls.h b/ncdap_test/manyurls.h index 72f8478183..98db34e020 100644 --- a/ncdap_test/manyurls.h +++ b/ncdap_test/manyurls.h @@ -19,7 +19,9 @@ static char* urllist[] = { "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t003.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t006.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t009.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", +#ifdef DISABLE_TEST_CASE "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t012.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", +#endif "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t015.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t018.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", "https://www.ncei.noaa.gov/thredds-coastal/dodsC/us_east/us_east_20091119_to_20130404/20100103/ncom_relo_useast_u_2010010300_t021.nc?water_u[0][0:39][751][287],water_v[0][0:39][751][287]", diff --git a/ncdap_test/test_manyurls.c b/ncdap_test/test_manyurls.c index 6f2ec01e38..9a44b5c075 100644 --- a/ncdap_test/test_manyurls.c +++ b/ncdap_test/test_manyurls.c @@ -5,12 +5,14 @@ #include "manyurls.h" -#undef VERBOSE +#define VERBOSE int main() { int i,ncid; char** p; + int nerrors = 0; + int nnotfound = 0; for(i=1,p=urllist;i<100;p++,i++) { char* tp = *p; @@ -27,14 +29,18 @@ int main() #ifdef VERBOSE printf("{%d} %s\n",i,tp); #endif - status = NC_NOERR; + nnotfound++; break; default: fprintf(stderr,"*** %s\n",nc_strerror(status)); - return 1; + nerrors++; + break; } - // nc_close(ncid); + if(status) nc_close(ncid); + status = NC_NOERR; } - return 0; + fprintf(stderr,">>> not-found=%d errors=%d\n",nnotfound,nerrors); + fflush(stderr); + return (nnotfound+nerrors > 0 ? 1: 0); } diff --git a/ncdap_test/test_vara.c b/ncdap_test/test_vara.c index bc1e582b26..1d634c9688 100644 --- a/ncdap_test/test_vara.c +++ b/ncdap_test/test_vara.c @@ -53,8 +53,8 @@ typedef struct Odom { size_t* count; } Odom; -#ifdef IGNORE -static float threeD_data[X*Y*Z]; +#ifdef NOCODE +static float threeD_data[X][Y][Z]; static int dims[RANK] = {X,Y,Z}; #endif static float threeD[X][Y][Z]; @@ -219,7 +219,7 @@ static size_t odom_count(Odom* odom) return offset; } -#ifdef IGNORE +#ifdef NOCODE static float threeD_data[X][Y][Z] = { 1, 0.999950000416665, 0.999800006666578, 0.999550033748988, 0.999200106660978, 0.998750260394966, 0.998200539935204, diff --git a/ncdump/CMakeLists.txt b/ncdump/CMakeLists.txt index aa8327181e..04043c49f0 100644 --- a/ncdump/CMakeLists.txt +++ b/ncdump/CMakeLists.txt @@ -74,11 +74,11 @@ endif(NETCDF_ENABLE_DAP) #### # We have to do a little tweaking # to remove the Release/ and Debug/ directories -# in Windows builds. This is required to get +# in MSVC builds. This is required to get # test scripts to work. #### -if(WIN32) +if(MSVC) macro(setbinprops name) set_target_properties(${name} PROPERTIES @@ -131,7 +131,7 @@ endif() target_link_libraries(tst_fileinfo netcdf ${ALL_TLL_LIBS}) ENDIF() - IF(WIN32) + IF(MSVC) set_target_properties(rewrite-scalar PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} @@ -184,7 +184,7 @@ endif() RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_CURRENT_BINARY_DIR} ) endif(USE_HDF5) - endif(WIN32) + endif(MSVC) # Build support programs build_bin_test_no_prefix(tst_utf8) @@ -217,10 +217,11 @@ endif() if(HAVE_BASH) - ## Start adding tests in the appropriate order + # Support programs add_bin_test_no_prefix(ref_ctest) add_bin_test_no_prefix(ref_ctest64) + ## Start adding tests in the appropriate order add_sh_test(ncdump run_tests) add_sh_test(ncdump tst_64bit) add_sh_test(ncdump tst_lengths) diff --git a/ncdump/Makefile.am b/ncdump/Makefile.am index 14b36aca82..e22cf59466 100644 --- a/ncdump/Makefile.am +++ b/ncdump/Makefile.am @@ -253,6 +253,6 @@ tmp_keyword1.cdl tmp_keyword2.cdl tmp_keyword3.cdl tmp_keyword4.cdl \ type_*.nc copy_type_*.cdl \ scope_*.nc copy_scope_*.cdl keyword5.nc tst_enum_undef.cdl tst_times_nc4.cdl -# Remove directories + clean-local: - rm -fr rcmergedir rchome testset_* + rm -fr rcmergedir rchome diff --git a/ncdump/nccopy.1 b/ncdump/nccopy.1 index f4a90f71b8..7bdda444ba 100644 --- a/ncdump/nccopy.1 +++ b/ncdump/nccopy.1 @@ -23,6 +23,7 @@ nccopy \%[\-F \fI filterspec \fP] \%[\-L \fI n \fP] \%[\-M \fI n \fP] +\%[\-X \fI flag \fP \fI arg \fP] \%\fI infile \fP \%\fI outfile \fP .hy @@ -307,6 +308,11 @@ is a 32-bit unsigned integer. .IP This parameter may be repeated multiple times with different variable names. +.IP "\fB \-X \fP \fI flag \fP \fI arg \fP" +Flag overflow option for uncommon flags. +.IP +\fBflag == 'f'\fP forces enabling of the global fill flag (nc_set_fill). +This option may be used multiple times to set several flags. .SH EXAMPLES .LP diff --git a/ncdump/nccopy.c b/ncdump/nccopy.c index bc8fa70e37..f53f3b9bbf 100644 --- a/ncdump/nccopy.c +++ b/ncdump/nccopy.c @@ -154,6 +154,8 @@ static char** option_lvars = 0; /* list of variable names specified with -v static bool_t option_varstruct = false; /* if -v set, copy structure for non-selected vars */ static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of * chunk cache for each variable */ +static int option_global_fill = 0; /* turn on global fill using nc_set_fill */ + /* get group id in output corresponding to group igrp in input, * given parent group id (or root group id) parid in output. */ static int @@ -2095,8 +2097,11 @@ copy(char* infile, char* outfile) break; } NC_CHECK(nc_create(outfile, create_mode, &ogrp)); - NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL)); - + if(option_global_fill) { + NC_CHECK(nc_set_fill(ogrp, NC_FILL, NULL)); + } else { + NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL)); + } #ifdef USE_NETCDF4 /* Because types in one group may depend on types in a different * group, need to create all groups before defining types */ @@ -2208,6 +2213,9 @@ usage(void) [-F filterspec] specify a compression algorithm to apply to an output variable (may be repeated).\n\ [-Ln] set log level to n (>= 0); ignored if logging isn't enabled.\n\ [-Mn] set minimum chunk size to n bytes (n >= 0)\n\ + [-X flag overflow for uncommon flags.\n\ + flag == 'f' forces enabling global fill flag (nc_set_fill)\n\ + Use multiple times to set several flags.\n\ infile name of netCDF input file\n\ outfile name for netCDF output file\n" @@ -2239,7 +2247,7 @@ main(int argc, char**argv) } opterr = 1; - while ((c = getopt(argc, argv, "k:3467d:sum:c:h:e:rwxg:G:v:V:F:L:M:")) != -1) { + while ((c = getopt(argc, argv, "k:3467d:sum:c:h:e:rwxg:G:v:V:F:L:M:X:")) != -1) { switch(c) { case 'k': /* for specifying variant of netCDF format to be generated Format names: @@ -2394,7 +2402,15 @@ main(int argc, char**argv) #else error("-M requires netcdf-4"); #endif - + case 'X': /* uncommon flags */ + switch (optarg[0]) { + case 'f': /* turn on global fill */ + option_global_fill = 1; + break; + case '\0': break; + default: error("-X unknown sub-flag"); + } + break; default: usage(); } diff --git a/ncdump/ncdump.c b/ncdump/ncdump.c index b9407a925d..b29cacaf31 100644 --- a/ncdump/ncdump.c +++ b/ncdump/ncdump.c @@ -72,9 +72,18 @@ static const char* keywords[] = { NULL }; +static const char* hidden_attributes[] = { +NCPROPS, +SUPERBLOCKATT, +ISNETCDF4ATT, +NULL +}; + /*Forward*/ static int searchgrouptreedim(int ncid, int dimid, int* parentidp); extern int nc__testurl(const char*,char**); +static int count_hidden_atts(int ncid, int ngatts); +static int ishidden(const char* name); static int iskeyword(const char* kw) { @@ -769,9 +778,10 @@ pr_att( #ifdef USE_NETCDF4 if (ncid == getrootid(ncid) && varid == NC_GLOBAL - && strcmp(att.name,NCPROPS)==0) + && ishidden(att.name)) return; /* will be printed elsewhere */ #endif + NC_CHECK( nc_inq_att(ncid, varid, att.name, &att.type, &att.len) ); att.tinfo = get_typeinfo(att.type); @@ -1597,6 +1607,7 @@ do_ncdump_rec(int ncid, const char *path) int ndims; /* number of dimensions */ int nvars; /* number of variables */ int ngatts; /* number of global attributes */ + int nhidden; /* number of global hidden attributes */ int xdimid; /* id of unlimited dimension */ int varid; /* variable id */ int rootncid; /* id of root group */ @@ -1893,7 +1904,10 @@ do_ncdump_rec(int ncid, const char *path) if(var.dims) {free((void*)var.dims); var.dims = NULL;} } - if (ngatts > 0 || formatting_specs.special_atts) { + /* Get count of hidden global attributes */ + nhidden = count_hidden_atts(ncid,ngatts); + + if (ngatts > nhidden || formatting_specs.special_atts) { printf ("\n"); indent_out(); if (is_root) @@ -1905,9 +1919,9 @@ do_ncdump_rec(int ncid, const char *path) pr_att(ncid, kind, NC_GLOBAL, "", ia); } if (is_root && formatting_specs.special_atts) { /* output special attribute - * for format variant */ + * for format variant */ - pr_att_hidden(ncid, kind); + pr_att_hidden(ncid, kind); pr_att_global_format(ncid, kind); } @@ -2601,3 +2615,32 @@ searchgrouptreedim(int ncid, int dimid, int* parentidp) nullfree(ids); return ret; } + +/* Test if name is hidden attribute */ +static int +ishidden(const char* name) +{ + const char** p; + for(p=hidden_attributes;*p;p++) { + if(strcmp(*p,name)==0) return 1; + } + return 0; +} + +/* Count number of hidden (WRT ncdump) attributes */ +static int +count_hidden_atts(int ncid, int ngatts) +{ + int nhidden = 0; +#ifdef USE_NETCDF4 + int ia; + char aname[NC_MAX_NAME]; + if (ncid != getrootid(ncid) || ngatts == 0) return 0; + for(ia=0;ia