diff --git a/.github/workflows/main-cmake.yml b/.github/workflows/main-cmake.yml
index 4a783a6819..9fab45b852 100644
--- a/.github/workflows/main-cmake.yml
+++ b/.github/workflows/main-cmake.yml
@@ -3,7 +3,7 @@ name: NetCDF-C CMake CI - Windows
on: [ pull_request, workflow_dispatch]
env:
- REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }}
+ REMOTETESTDOWN: no
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
@@ -53,6 +53,12 @@ jobs:
- name: Dump Matrix Context
run: echo '${{ toJSON(matrix) }}'
+ - name: Check Hard Drive Space
+ shell: bash -el {0}
+ run: |
+ df -h
+ pwd
+
#- run: echo "CMAKE_PREFIX_PATH=${env.CONDA_PREFIX}/Library" >> $GITHUB_ENV
#- run: echo "/c/Users/runneradmin/miniconda3/Library/lib:${GITHUB_PATH}" >> $GITHUB_ENV
#- run: echo ""
@@ -88,6 +94,12 @@ jobs:
ls $CONDA_PREFIX/Library/include/
shell: bash -el {0}
+ - name: Check Hard Drive Space
+ shell: bash -el {0}
+ run: |
+ df -h
+ pwd
+
- name: Perform out-of-directory configuration
shell: bash -el {0}
run: |
@@ -108,6 +120,12 @@ jobs:
run: |
cd build
cat libnetcdf.settings
+
+ - name: Check Hard Drive Space Post Summary
+ shell: bash -el {0}
+ run: |
+ df -h
+ pwd
- name: Perform out-of-directory build - libnetcdf
shell: bash -el {0}
@@ -163,4 +181,6 @@ jobs:
run: |
cd build
PATH=~/tmp/bin:$PATH ctest . --rerun-failed --output-on-failure -VV
+ df -h
+ pwd
if: ${{ failure() }}
diff --git a/.github/workflows/run_tests_cdash.yml b/.github/workflows/run_tests_cdash.yml
index dfb8572560..09be22770b 100644
--- a/.github/workflows/run_tests_cdash.yml
+++ b/.github/workflows/run_tests_cdash.yml
@@ -7,7 +7,7 @@ name: Run CDash Ubuntu/Linux netCDF Tests
on: [workflow_dispatch]
env:
- REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }}
+ REMOTETESTDOWN: no
concurrency:
group: ${{ github.workflow}}-${{ github.head_ref }}
diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml
index 5d561807b0..c27cf8666e 100644
--- a/.github/workflows/run_tests_ubuntu.yml
+++ b/.github/workflows/run_tests_ubuntu.yml
@@ -7,7 +7,7 @@ name: Run Ubuntu/Linux netCDF Tests
on: [pull_request,workflow_dispatch]
env:
- REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }}
+ REMOTETESTDOWN: no
concurrency:
group: ${{ github.workflow}}-${{ github.head_ref }}
diff --git a/.github/workflows/run_tests_win_cygwin.yml b/.github/workflows/run_tests_win_cygwin.yml
index 54e8d0affb..95098c652a 100644
--- a/.github/workflows/run_tests_win_cygwin.yml
+++ b/.github/workflows/run_tests_win_cygwin.yml
@@ -10,7 +10,7 @@ env:
SHELLOPTS: igncr
CHERE_INVOKING: 1
CYGWIN_NOWINPATH: 1
- REMOTETESTDOWN: ${{ vars.REMOTETESTDOWN }}
+ REMOTETESTDOWN: no
jobs:
build-and-test-autotools:
@@ -26,6 +26,10 @@ jobs:
plugin_dir_option: ["", "--without-plugin-dir"]
steps:
+ - name: Set safe directory for Git
+ shell: pwsh
+ run: git config --global --add safe.directory /cygdrive/d/a/netcdf-c/netcdf-c
+
- name: Fix line endings
shell: pwsh
run: git config --global core.autocrlf input
@@ -55,7 +59,7 @@ jobs:
/bin/dash ./configure --enable-hdf5 --enable-shared
--disable-static --enable-dap --disable-dap-remote-tests
--enable-plugins ${{ matrix.plugin_dir_option }}
- --disable-nczarr --disable-nczarr-filters
+ --disable-nczarr #--disable-nczarr-filters
--disable-s3 --with-s3-testing=no
@@ -103,8 +107,13 @@ jobs:
run:
shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}"
- steps:
+# shell: C:/cygwin/bin/bash.exe -eo pipefail -o igncr "{0}"
+ steps:
+ - name: Set safe directory for Git
+ shell: pwsh
+ run: git config --global --add safe.directory /cygdrive/d/a/netcdf-c/netcdf-c
+
- run: git config --global core.autocrlf input
shell: pwsh
- uses: actions/checkout@v4
diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml
index be3034f9af..018863973b 100644
--- a/.github/workflows/run_tests_win_mingw.yml
+++ b/.github/workflows/run_tests_win_mingw.yml
@@ -46,7 +46,7 @@ jobs:
run: autoreconf -if
- name: (Autotools) Configure Build
- run: ./configure --enable-hdf5 --enable-dap --disable-dap-remote-tests --disable-static --disable-byterange --disable-dap-remote-tests --disable-logging --enable-plugins --disable-nczarr-filters --disable-s3
+ run: ./configure --enable-hdf5 --enable-dap --disable-dap-remote-tests --disable-static --disable-byterange --disable-dap-remote-tests --disable-logging --enable-plugins --disable-s3 #--disable-nczarr-filters
if: ${{ success() }}
- name: (Autotools) Look at config.log if error
diff --git a/CMakeLists.txt b/CMakeLists.txt
index fc42e5c696..a44a1cbe13 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,6 @@ set(PACKAGE "netCDF" CACHE STRING "")
include(netcdf_functions_macros)
include(deprecated)
-
# Backport of built-in `PROJECT_IS_TOP_LEVEL` from CMake 3.21
if (NOT DEFINED NETCDF_IS_TOP_LEVEL)
set(NETCDF_IS_TOP_LEVEL OFF)
@@ -440,18 +439,18 @@ endif()
# Format Option checks
################################
-# As a long term goal, and because it is now the case that
-# NETCDF_ENABLE_NCZARR => USE_NETCDF4, so make the external options
-# NETCDF_ENABLE_NETCDF_4 and NETCDF_ENABLE_NETCDF4 obsolete
-# in favor of NETCDF_ENABLE_HDF5.
+# As a long term goal, and because it is now the case that --enable-nczarr
+# => USE_NETCDF4, make the external options --enable-netcdf-4 and
+# --enable-netcdf4 obsolete in favor of --enable-hdf5
# We will do the following for one more release cycle.
-# 1. Make NETCDF_ENABLE_NETCDF_4 be an alias for NETCDF_ENABLE_NETCDF4.
-# 2. Make NETCDF_ENABLE_NETCDF4 an alias for NETCDF_ENABLE_HDF5.
-# 3. Internally, convert most (but not all) uses of USE_NETCDF_4 and USE_NETCDF4 to USE_HDF5.
+# 1. Make --enable-netcdf-4 be an alias for --enable-netcdf4.
+# 2. Make --enable-netcdf4 an alias for --enable-hdf5.
+# 3. Internally, convert most uses of USE_NETCDF_4 ad USE_NETCDF4 to USE_HDF5
-# Collect the values of NETCDF_ENABLE_NETCDF_4, NETCDF_ENABLE_NETCDF4, and NETCDF_ENABLE_HDF5.
+# Collect the values of -DNETCDF_ENABLE_NETCDF-4, -DNETCDF_ENABLE_NETCDF4, and -DNETCDF_ENABLE_HDF5.
+# Also determine which have been explicitly set on the command line.
-# Figure out which options are defined and process options
+# Figure out which options are defined
if(DEFINED NETCDF_ENABLE_NETCDF_4)
set(UNDEF_NETCDF_4 OFF CACHE BOOL "")
option(NETCDF_ENABLE_NETCDF_4 "" ON)
@@ -493,8 +492,11 @@ if(UNDEF_HDF5)
set(NETCDF_ENABLE_HDF5 ON CACHE BOOL "" FORCE)
endif()
-# Turn off NETCDF_ENABLE_NETCDF4 because it will be used
-# as a shorthand for NETCDF_ENABLE_HDF5|NETCDF_ENABLE_HDF4|NETCDF_ENABLE_NCZARR
+# Turn off enable_netcdf_4 because it is no longer needed
+set(NETCDF_ENABLE_NETCDF_4 OFF CACHE BOOL "" FORCE)
+
+# Turn off enable_netcdf4 because it will be used
+# as a shorthand for ENABLE_HDF5|ENABLE_HDF4|ENABLE_NCZARR
set(NETCDF_ENABLE_NETCDF4 OFF CACHE BOOL "" FORCE)
option(NETCDF_ENABLE_DAP "Enable DAP2 and DAP4 Client." ON)
option(NETCDF_ENABLE_NCZARR "Enable NCZarr Client." ON)
@@ -507,10 +509,28 @@ if(NETCDF_ENABLE_HDF4)
set(USE_HDF4 ON)
endif()
+# Decide default Zarr Format for creation
+OPTION(NETCDF_ENABLE_DEFAULT_ZARR_FORMAT_V3 "Specify the default Zarr format is V3" OFF)
+if(NETCDF_ENABLE_DEFAULT_ZARR_FORMAT_V3)
+SET(DFALTZARRFORMAT 3 CACHE STRING "")
+ELSE()
+SET(DFALTZARRFORMAT 2 CACHE STRING "")
+ENDIF()
+
+IF(NETCDF_ENABLE_NCZARR)
+ SET(NETCDF_ENABLE_NCZARR_V3 ON CACHE BOOL "Enable Zarr V3" FORCE)
+ # V3 is enabled if NCzarr is enabled
+ELSE()
+ SET(NETCDF_ENABLE_NCZARR_V3 OFF CACHE BOOL "Enable Zarr V3" FORCE)
+ENDIF()
+
# Netcdf-4 support (i.e. libsrc4) is required by more than just HDF5 (e.g. NCZarr)
# So depending on what above formats are enabled, enable netcdf-4
+# as a short-hand for this boolean expression
if(NETCDF_ENABLE_HDF5 OR NETCDF_ENABLE_HDF4 OR NETCDF_ENABLE_NCZARR)
set(NETCDF_ENABLE_NETCDF4 ON CACHE BOOL "Enable netCDF-4 API" FORCE)
+ # For convenience, define USE_NETCDF4
+ set(USE_NETCDF4 ON CACHE BOOL "Enable netCDF-4 API" FORCE)
endif()
# enable|disable all forms of network access
@@ -534,10 +554,7 @@ endif()
# Did the user specify a default minimum blocksize for posixio?
set(NCIO_MINBLOCKSIZE 256 CACHE STRING "Minimum I/O Blocksize for netCDF classic and 64-bit offset format files.")
-if(NETCDF_ENABLE_NETCDF4)
- set(USE_NETCDF4 ON CACHE BOOL "")
- set(NETCDF_ENABLE_NETCDF4 ON CACHE BOOL "")
-else()
+if(NOT USE_NETCDF4)
set(USE_HDF4_FILE_TESTS OFF)
set(USE_HDF4 OFF)
set(NETCDF_ENABLE_HDF4_FILE_TESTS OFF)
@@ -993,7 +1010,7 @@ endif(NETCDF_ENABLE_ERANGE_FILL)
# Now NETCDF_ENABLE_ERANGE_FILL is either AUTO, ON, or OFF
# More relaxed coordinate check is now mandatory for all builds.
-set(ENABLE_ZERO_LENGTH_COORD_BOUND ON)
+set(NETCDF_ENABLE_ZERO_LENGTH_COORD_BOUND ON)
# check and conform with PnetCDF settings on ERANGE_FILL and RELAX_COORD_BOUND
if(USE_PNETCDF)
@@ -1016,7 +1033,7 @@ if(NETCDF_ENABLE_ERANGE_FILL)
set(M4FLAGS "-DERANGE_FILL" CACHE STRING "")
endif()
-if(ENABLE_ZERO_LENGTH_COORD_BOUND)
+if(NETCDF_ENABLE_ZERO_LENGTH_COORD_BOUND)
message(STATUS "Enabling a more relaxed check for NC_EINVALCOORDS")
target_compile_definitions(netcdf PRIVATE RELAX_COORD_BOUND)
endif()
@@ -1540,6 +1557,12 @@ if(NETCDF_ENABLE_TESTS)
if(NETCDF_ENABLE_NCZARR)
include_directories(nczarr_test)
add_subdirectory(nczarr_test)
+ if(NOT ISCMAKE)
+ # Cannot create target because another target with
+ # the same name already exists.
+ # See documentation for policy CMP0002 for more details.
+ add_subdirectory(v3_nczarr_test)
+ endif()
endif()
endif()
@@ -1673,10 +1696,10 @@ is_disabled(BUILD_SHARED_LIBS enable_static)
is_enabled(BUILD_SHARED_LIBS enable_shared)
is_enabled(NETCDF_ENABLE_V2_API HAS_NC2)
-is_enabled(NETCDF_ENABLE_NETCDF4 HAS_NC4)
is_enabled(NETCDF_ENABLE_HDF4 HAS_HDF4)
is_enabled(USE_HDF4 HAS_HDF4)
is_enabled(USE_HDF5 HAS_HDF5)
+is_enabled(USE_NETCDF4 HAS_NC4)
is_enabled(OFF HAS_BENCHMARKS)
is_enabled(STATUS_PNETCDF HAS_PNETCDF)
is_enabled(STATUS_PARALLEL HAS_PARALLEL)
@@ -1735,6 +1758,10 @@ endif()
# WARNING: this must match the value in configure.ac
set(S3TESTBUCKET "unidata-zarr-test-data" CACHE STRING "S3 test bucket")
+# Additional S3 Test Endpoing
+set(S3ENDPOINT "s3.us-east-1.amazonaws.com" CACHE STRING "S3 endpoint")
+
+
# The working S3 path tree within the Unidata bucket.
# WARNING: this must match the value in configure.ac
set(S3TESTSUBTREE "netcdf-c" CACHE STRING "Working S3 path.")
@@ -1849,7 +1876,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/s3cleanup.in ${CMAKE_CURRENT_BINARY_D
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/s3gc.in ${CMAKE_CURRENT_BINARY_DIR}/s3gc.sh @ONLY NEWLINE_STYLE LF)
#####
-# Build and copy nc_test4/findplugin.sh to various places
+# Transfer files from a single source to directories that use it.
#####
foreach(CC nc_test4 nczarr_test v3_nczarr_test plugins h5_test examples/C)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/plugins/findplugin.in ${CMAKE_CURRENT_BINARY_DIR}/${CC}/findplugin.sh @ONLY NEWLINE_STYLE LF)
diff --git a/Makefile.am b/Makefile.am
index 8ccfd6a4d2..6e02782330 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -110,8 +110,11 @@ endif
# Build Cloud Storage if desired.
if NETCDF_ENABLE_NCZARR
-ZARR_TEST_DIRS = nczarr_test
ZARR = libnczarr
+ZARR_TEST_DIRS = nczarr_test
+if NETCDF_ENABLE_NCZARR_V3
+ZARR_TEST_DIRS += v3_nczarr_test
+endif
endif
# Optionally build test plugins
@@ -218,6 +221,9 @@ if NETCDF_ENABLE_S3_TESTALL
mv ${abs_top_builddir}/tmp_@PLATFORMUID@.uids ${abs_top_builddir}/s3cleanup_@PLATFORMUID@.uids
endif
+clean-local:
+ rm -fr alltests_*
+
if NETCDF_ENABLE_S3_TESTALL
distclean-local:
rm -f ${abs_top_builddir}/s3cleanup_@PLATFORMUID@.uids
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 5759b335c1..b2220bf4bc 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -5,6 +5,9 @@ Release Notes {#RELEASE_NOTES}
This file contains a high-level description of this package's evolution. Releases are in reverse chronological order (most recent first). Note that, as of netcdf 4.2, the `netcdf-c++` and `netcdf-fortran` libraries have been separated into their own libraries.
+## 4.9.4 - TBD
+* Add experimental support for the Zarr Version 3 storage format. This code willl change as the Zarr Version 3 Specification evolves. See [Github #3068](https://github.com/Unidata/netcdf-c/pull/3068).
+
## 4.9.3 - TBD
* Extend the netcdf API to support programmatic changes to the plugin search path. See [Github #3034](https://github.com/Unidata/netcdf-c/pull/3034) for more information.
@@ -26,8 +29,6 @@ This file contains a high-level description of this package's evolution. Release
* Convert NCZarr V2 to store all netcdf-4 specific info as attributes. This improves interoperability with other Zarr implementations by no longer using non-standard keys. The price to be paid is that lazy attribute reading cannot be supported. See [Github #2836](https://github.com/Unidata/netcdf-c/pull/2936) for more information.
* Cleanup the option code for NETCDF_ENABLE_SET_LOG_LEVEL\[_FUNC\] See [Github #2931](https://github.com/Unidata/netcdf-c/pull/2931) for more information.
-### Release Candidate 1 - July 26, 2024
-
* Convert NCZarr V2 to store all netcdf-4 specific info as attributes. This improves interoperability with other Zarr implementations by no longer using non-standard keys. The price to be paid is that lazy attribute reading cannot be supported. See [Github #2836](https://github.com/Unidata/netcdf-c/issues/2936) for more information.
* Cleanup the option code for NETCDF_ENABLE_SET_LOG_LEVEL\[_FUNC\] See [Github #2931](https://github.com/Unidata/netcdf-c/issues/2931) for more information.
* Fix duplicate definition when using aws-sdk-cpp. See [Github #2928](https://github.com/Unidata/netcdf-c/issues/2928) for more information.
diff --git a/cmake/netcdf_functions_macros.cmake b/cmake/netcdf_functions_macros.cmake
index ea9d8ce819..7ae7b95b04 100644
--- a/cmake/netcdf_functions_macros.cmake
+++ b/cmake/netcdf_functions_macros.cmake
@@ -221,7 +221,7 @@ macro(print_conf_summary)
message("Configuration Summary:")
message("")
message(STATUS "Building Shared Libraries: ${BUILD_SHARED_LIBS}")
- message(STATUS "Building netCDF-4: ${NETCDF_ENABLE_NETCDF_4}")
+ message(STATUS "Building netCDF-4: ${NETCDF_ENABLE_NETCDF4}")
message(STATUS "Building DAP2 Support: ${NETCDF_ENABLE_DAP2}")
message(STATUS "Building DAP4 Support: ${NETCDF_ENABLE_DAP4}")
message(STATUS "Building Byte-range Support: ${NETCDF_ENABLE_BYTERANGE}")
@@ -277,6 +277,39 @@ macro(getdpkg_arch arch)
execute_process(COMMAND "${NC_DPKG}" "--print-architecture" OUTPUT_VARIABLE "${arch}" OUTPUT_STRIP_TRAILING_WHITESPACE)
endmacro(getdpkg_arch)
+macro(NCZARR_SH_TEST basename src)
+ file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../${src}/tst_${basename}.sh SHSOURCE)
+ # Make sure the order of prepended lines is correct
+ string(PREPEND SHSOURCE "TESTNCZARR=1\n")
+ string(PREPEND SHSOURCE "#!/bin/bash\n")
+ # Replace with FILE(CONFIGURE) when cmake 3.18 is in common use
+ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1 "${SHSOURCE}")
+ configure_file(${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1 ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.sh FILE_PERMISSIONS OWNER_WRITE OWNER_READ OWNER_EXECUTE @ONLY NEWLINE_STYLE LF)
+ file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/run_${basename}.1)
+endmacro(NCZARR_SH_TEST)
+
+macro(NCZARR_C_TEST basename newname src)
+ file(READ ${CMAKE_CURRENT_SOURCE_DIR}/../${src}/${basename}.c CSOURCE)
+ string(PREPEND CSOURCE "#define TESTNCZARR\n")
+ file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/${newname}.c "${CSOURCE}")
+endmacro(NCZARR_C_TEST)
+
+macro(build_bin_test_with_util_lib F UTIL_LIB)
+ build_bin_test(${F})
+ if(ENABLE_DLL)
+ target_compile_definitions(${F} PUBLIC -DDLL_NETCDF)
+ endif(ENABLE_DLL)
+ target_link_libraries(${F} ${UTIL_LIB} ${ALL_TLL_LIBS})
+endmacro()
+
+macro(add_bin_test_with_util_lib PREFIX F UTIL_LIB)
+ add_bin_test(${PREFIX} ${F})
+ if(ENABLE_DLL)
+ target_compile_definitions(${PREFIX}_${F} PUBLIC -DDLL_NETCDF)
+ endif(ENABLE_DLL)
+ target_link_libraries(${PREFIX}_${F} ${UTIL_LIB} ${ALL_TLL_LIBS})
+endmacro()
+
################################
# Functions
################################
@@ -336,4 +369,3 @@ function(getlastdir s ret_val)
list(GET list -1 last)
set(${ret_val} "${last}" PARENT_SCOPE)
endfunction()
-
diff --git a/cmake/v3_setup.cmake b/cmake/v3_setup.cmake
new file mode 100644
index 0000000000..6cd99f43e8
--- /dev/null
+++ b/cmake/v3_setup.cmake
@@ -0,0 +1,29 @@
+# Sources that are specific to Zarr V3 testing
+set(V3FILES run_nan.sh run_mud.sh)
+set(V3DATA ref_nczarr2zarr.cdl ref_purezarr.cdl ref_xarray.cdl ref_misc2.cdl ref_jsonconvention.cdl ref_jsonconvention.zmap ref_nulls_zarr.baseline ref_string_zarr.baseline ref_string_nczarr.baseline ref_zarr_test_data_2d.cdl.gz ref_groups_regular.cdl ref_filtered.cdl ref_any.cdl ref_multi.cdl ref_tst_nans.dmp ref_bzip2.cdl ref_tst_mud4-bc.cdl ref_tst_mud4.cdl ref_tst_mud4_chars.cdl)
+
+# Shell scripts that are copies of same files from nczarr_test
+SET(TESTFILES_NCZARR_SH test_nczarr.sh run_chunkcases.sh run_corrupt.sh run_external.sh run_fillonlyz.sh run_filter.sh run_filterinstall.sh run_filter_misc.sh run_filter_vlen.sh run_interop.sh run_jsonconvention.sh run_misc.sh run_nccopy5.sh run_nccopyz.sh run_ncgen4.sh run_nczarr_fill.sh run_nczfilter.sh run_newformat.sh run_notzarr.sh run_nulls.sh run_perf_chunks1.sh run_purezarr.sh run_quantize.sh run_scalar.sh run_specific_filters.sh run_strings.sh run_unknown.sh run_unlim_io.sh run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh)
+
+# Program files
+set(TESTFILES_NCZARR_C test_chunking.c test_filter_vlen.c test_h5_endians.c test_put_vars_two_unlim_dim.c test_quantize.c test_unlim_vars.c tst_pure_awssdk.cpp)
+
+# Data files
+set(TESTDATA_NCZARR ref_nulls_nczarr.baseline ref_zarr_test_data.cdl.gz ref_avail1.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_fillonly.cdl ref_misc1.cdl ref_ndims.cdl ref_newformatpure.cdl ref_nulls.cdl ref_oldformat.cdl ref_perdimspecs.cdl ref_power_901_constants.cdl ref_purezarr_base.cdl ref_quotes.cdl ref_rem.cdl ref_scalar.cdl ref_skip.cdl ref_skipw.cdl ref_string.cdl ref_t_meta_dim1.cdl ref_t_meta_var1.cdl ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta.cdl ref_ut_map_writemeta2.cdl ref_ut_testmap_create.cdl ref_whole.cdl ref_avail1.dmp ref_misc1.dmp ref_ndims.dmp ref_rem.dmp ref_noshape.file.zip ref_groups.h5 ref_notzarr.tar.gz ref_avail1.txt ref_skip.txt ref_ut_json_build.txt ref_ut_json_parse.txt ref_ut_mapapi_search.txt ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt ref_ut_proj.txt ref_whole.txt ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip ref_oldformat.zip ref_power_901_constants_orig.zip ref_quotes_orig.zip)
+
+macro(v3_setup)
+# Add command to update the test source from nczarr_test
+ foreach(u ${TESTFILES_NCZARR_C} ${TESTDATA_NCZARR})
+ set(SRCPATH "${CMAKE_SOURCE_DIR}/nczarr_test/${u}")
+ set(DSTPATH "${CMAKE_BINARY_DIR}/v3_nczarr_test/${u}")
+ file(REMOVE ${DSTPATH})
+ file(COPY ${SRCPATH} DESTINATION ${DSTPATH})
+ endforeach()
+
+ foreach(u ${TESTFILES_NCZARR_SH})
+ set(SRCPATH "${CMAKE_SOURCE_DIR}/nczarr_test/${u}")
+ set(DSTPATH "${CMAKE_BINARY_DIR}/v3_nczarr_test/${u}")
+ file(REMOVE ${DSTPATH})
+ file(COPY ${SRCPATH} DESTINATION ${DSTPATH})
+ endforeach()
+endmacro()
diff --git a/config.h.cmake.in b/config.h.cmake.in
index 26a5b4773b..66e27db64f 100644
--- a/config.h.cmake.in
+++ b/config.h.cmake.in
@@ -106,6 +106,9 @@ are set when opening a binary file on Windows. */
/* default chunk size in bytes */
#cmakedefine DEFAULT_CHUNK_SIZE ${DEFAULT_CHUNK_SIZE}
+/* default zarr format*/
+#cmakedefine DFALTZARRFORMAT ${DFALTZARRFORMAT}
+
/* set this only when building a DLL under MinGW */
#cmakedefine DLL_EXPORT 1
@@ -154,12 +157,18 @@ are set when opening a binary file on Windows. */
/* if true, enable NCZARR */
#cmakedefine NETCDF_ENABLE_NCZARR 1
+/* if true, enable NCZARR Version 3*/
+#cmakedefine NETCDF_ENABLE_NCZARR_V3 1
+
/* if true, enable nczarr filter support */
#cmakedefine NETCDF_ENABLE_NCZARR_FILTERS 1
/* if true, enable nczarr zip support */
#cmakedefine NETCDF_ENABLE_NCZARR_ZIP 1
+/* if true, enable nczarr V3 support */
+#cmakedefine NETCDF_ENABLE_NCZARR_V3 1
+
/* if true, Allow dynamically loaded plugins */
#cmakedefine NETCDF_ENABLE_PLUGINS 1
@@ -187,6 +196,11 @@ are set when opening a binary file on Windows. */
/* S3 Working subtree path prefix*/
#define S3TESTSUBTREE "${S3TESTSUBTREE}"
+/* S3 Test endpoint */
+#define S3ENDPOINT "${S3ENDPOINT}"
+
+/* S3 Test Bucket */
+
/* if true, run extra tests which may not work yet */
#cmakedefine EXTRA_TESTS 1
@@ -517,7 +531,7 @@ with zip */
#cmakedefine VALGRIND_TESTS 1
#cmakedefine NETCDF_ENABLE_CDMREMOTE 1
#cmakedefine USE_HDF5 1
-#cmakedefine ENABLE_FILEINFO 1
+#cmakedefine NETCDF_ENABLE_FILEINFO 1
#cmakedefine TEST_PARALLEL ${TEST_PARALLEL}
#cmakedefine BUILD_RPC 1
#cmakedefine USE_X_GETOPT 1
diff --git a/configure.ac b/configure.ac
index be202762b3..3f23d38229 100644
--- a/configure.ac
+++ b/configure.ac
@@ -150,7 +150,6 @@ AC_MSG_NOTICE([checking supported formats])
# Collect the values of --enable-netcdf-4, --enable-netcdf4, and --enable-hdf5.
# Also determine which have been explicitly set on the command line.
-
AC_ARG_ENABLE([netcdf-4], [AS_HELP_STRING([--enable-netcdf-4],
[(Deprecated) Synonym for --enable-hdf5; default yes])])
AC_ARG_ENABLE([netcdf4], [AS_HELP_STRING([--enable-netcdf4],
@@ -166,11 +165,11 @@ if test "x$enable_netcdf4" != x ; then
fi
# --enable-netcdf-4 overrides --enable-netcdf4 if latter not defined
-if test "x$enable_netcdf_4" != x && test "x$enable_netcdf4" == x ; then
+if test "x$enable_netcdf_4" != x && test "x$enable_netcdf4" = x ; then
enable_netcdf4="$enable_netcdf_4"
fi
# --enable-netcdf4 overrides --enable-hdf5 if latter not defined
-if test "x$enable_netcdf4" != x && test "x$enable_hdf5" == x ; then
+if test "x$enable_netcdf4" != x && test "x$enable_hdf5" = x ; then
enable_hdf5="$enable_netcdf4"
fi
# Otherwise, use --enable-hdf5
@@ -227,8 +226,6 @@ AC_MSG_RESULT($enable_dap)
if test "x$enable_remote_functionality" = xno ; then
AC_MSG_WARN([All network access is disabled => DAP support disabled.])
enable_dap=no
- AC_MSG_WARN([All network access is disabled => NCZARR support disabled.])
- enable_nczarr=no
fi
AC_MSG_CHECKING([whether netcdf zarr storage format should be disabled])
@@ -238,11 +235,40 @@ AC_ARG_ENABLE([nczarr],
test "x$enable_nczarr" = xno || enable_nczarr=yes
AC_MSG_RESULT($enable_nczarr)
+AC_MSG_CHECKING([whether netcdf zarr storage format version 3 should be enabled])
+AC_ARG_ENABLE([nczarr-v3],
+ [AS_HELP_STRING([--enable-nczarr-v3],
+ [enable netcdf zarr version 3 storage support])]) dnl
+test "x$enable_nczarr_v3" = xyes || enable_nczarr_v3=no
+AC_MSG_RESULT($enable_nczarr_v3)
+if test "x$enable_nczarr_v3" = xyes ; then
+AC_DEFINE([NETCDF_ENABLE_NCZARR_V3], [1], [if true, include NCZarr V3 support])
+fi
+AM_CONDITIONAL([NETCDF_ENABLE_NCZARR_V3],[test "x$enable_nczarr_v3" = xyes])
+
# HDF5 | HDF4 | NCZarr => netcdf-4
if test "x$enable_hdf5" = xyes || test "x$enable_hdf4" = xyes || test "x$enable_nczarr" = xyes ; then
enable_netcdf4=yes
fi
+# Choose the default Zarr format
+AC_MSG_CHECKING([whether Zarr format 3 is the default])
+AC_ARG_ENABLE([default-zarr-format_v3],
+ [AS_HELP_STRING([--enable-default-zarr-format_v3],
+ [Specify the default Zarr format.])])
+test "x$enable_default_zarr_format_v3" = xyes | enable_default_zarr_format_v3=no # V2 is the default by default
+if test "x$enable_nczarr_v3" = xno ; then
+ enable_default_zarr_format_v3=no
+fi
+AC_MSG_RESULT([$enable_default_zarr_format_v3}])
+if test "x$enable_default_zarr_format_v3" = xyes; then
+ DFALTZARRFORMAT=3
+else
+ DFALTZARRFORMAT=2
+fi
+AC_DEFINE_UNQUOTED([DFALTZARRFORMAT], [$DFALTZARRFORMAT], [Default Zarr format])
+AC_SUBST([DFALTZARRFORMAT],[$DFALTZARRFORMAT])
+
AC_MSG_NOTICE([checking user options])
# Did the user specify a default minimum blocksize (NCIO_MINBLOCKSIZE) for posixio?
@@ -257,7 +283,7 @@ AC_DEFINE_UNQUOTED([NCIO_MINBLOCKSIZE], [$NCIO_MINBLOCKSIZE], [min blocksize for
# Find valgrind, if available, and add targets for it.
AX_VALGRIND_DFLT([sgcheck], [off])
AX_VALGRIND_CHECK
-AM_CONDITIONAL(ENABLE_VALGRIND, [test "x$VALGRIND_ENABLED" = xyes])
+AM_CONDITIONAL(NETCDF_ENABLE_VALGRIND, [test "x$VALGRIND_ENABLED" = xyes])
###
# Doxygen and doxygen-related options.
@@ -703,6 +729,13 @@ test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes
if test "x$enable_dap" = "xno" ; then
enable_dap_remote_tests=no
fi
+
+# Provide a global control for remotetest.
+if test "xREMOTETESTDOWN" = xyes ; then
+ AC_MSG_WARN("ENV(REMOTETESTDOWN) => netcdf_enable_dap_remote_tests == no")
+ enable_dap_remote_tests=no
+fi
+
AC_MSG_RESULT($enable_dap_remote_tests)
# Provide a global control for remotetest.
@@ -938,7 +971,6 @@ AC_MSG_RESULT([${have_sz}])
if test "x$enable_nczarr" = xno ; then
enable_nczarr_zip=no
else
-
# See if we have libzip for NCZarr
AC_SEARCH_LIBS([zip_open],[zip zip.dll cygzip.dll],[have_zip=yes],[have_zip=no])
AC_MSG_CHECKING([whether libzip library is available])
@@ -946,10 +978,13 @@ else
enable_nczarr_zip=${have_zip} # alias
AC_MSG_CHECKING([whether nczarr zip support is enabled])
+ AC_ARG_ENABLE([nczarr-zip],
+ [AS_HELP_STRING([--enable-nczarr-zip],
+ [enable netcdf zip support (default off)])])
+ test "x$have_zip" = xyes -a "x$enable_nczarr_zip" = xyes || enable_nczarr_zip=no
AC_MSG_RESULT([${enable_nczarr_zip}])
-
if test "x$enable_nczarr_zip" = xyes ; then
- AC_DEFINE([NETCDF_ENABLE_NCZARR_ZIP], [1], [If true, then libzip found])
+ AC_DEFINE([NETCDF_ENABLE_NCZARR_ZIP], [1], [If true, then libzip found and ZIP for nczarr enabled.])
fi
# Check for enabling of S3 support
@@ -1061,6 +1096,26 @@ else
fi
fi
+# For convenience, define USE_NETCDF4
+if test "x${enable_hdf5}" = xyes || test "x${enable_nczarr}" = xyes || test "x${enable_dap4}" = xyes ; then
+enable_netcdf4=yes
+AC_DEFINE([USE_NETCDF4], [1], [convenience to avoid having to specify the above test])
+else
+enable_netcdf4=no
+fi
+AM_CONDITIONAL([USE_NETCDF4], [test x$enable_netcdf4 = xyes])
+
+# Does the user want to turn off nc_set_log_level() function? (It will
+# always be defined if --enable-logging is used.)
+AC_MSG_CHECKING([whether nc_set_log_level() function is included (will do nothing unless enable-logging is also used)])
+AC_ARG_ENABLE([set_log_level_func], [AS_HELP_STRING([--disable-set-log-level-func],
+ [disable the nc_set_log_level function])])
+test "x$enable_set_log_level_func" = xno -a "x$enable_logging" = xno || enable_set_log_level_func=yes
+AC_MSG_RESULT($enable_set_log_level_func)
+if test "x$enable_set_log_level_func" = xyes -a "x$enable_netcdf4" = xyes; then
+ AC_DEFINE([NETCDF_ENABLE_SET_LOG_LEVEL], 1, [If true, define nc_set_log_level.])
+fi
+
# Check whether we want to enable strict null byte header padding.
# See https://github.com/Unidata/netcdf-c/issues/657 for more information.
AC_MSG_CHECKING([whether to enable strict null-byte header padding when reading (default off)])
@@ -1911,7 +1966,7 @@ AC_ARG_ENABLE([nczarr-filters], [AS_HELP_STRING([--disable-nczarr-filters],
test "x$enable_nczarr_filters" = xno || enable_nczarr_filters=yes
AC_MSG_RESULT([$enable_nczarr_filters])
-# Control filter test/example
+# Control filter testing
AC_MSG_CHECKING([whether filter testing should be run])
AC_ARG_ENABLE([filter-testing],
[AS_HELP_STRING([--disable-filter-testing],
@@ -2076,6 +2131,7 @@ AC_SUBST(HAS_NC4,[$enable_netcdf4])
AC_SUBST(HAS_CDF5,[$enable_cdf5])
AC_SUBST(HAS_HDF4,[$enable_hdf4])
AC_SUBST(HAS_BENCHMARKS,[$enable_benchmarks])
+AC_SUBST(HAS_NC4,[$enable_netcdf4])
AC_SUBST(HAS_HDF5,[$enable_hdf5])
AC_SUBST(HAS_PNETCDF,[$enable_pnetcdf])
AC_SUBST(HAS_LOGGING, [$enable_logging])
@@ -2092,8 +2148,10 @@ AC_SUBST(HAS_S3_AWS,[$enable_s3_aws])
AC_SUBST(HAS_S3_INTERNAL,[$enable_s3_internal])
AC_SUBST(HAS_HDF5_ROS3,[$has_hdf5_ros3])
AC_SUBST(HAS_NCZARR,[$enable_nczarr])
+AC_SUBST(HAS_NCZARR_V3,[$enable_nczarr_v3])
AC_SUBST(NETCDF_ENABLE_S3_TESTING,[$with_s3_testing])
AC_SUBST(HAS_NCZARR_ZIP,[$enable_nczarr_zip])
+AC_SUBST(NCZARR_DEFAULT_FORMAT,[$DFALTZARRFORMAT])
AC_SUBST(HAS_PLUGINS, [$enable_plugins])
AC_SUBST(HAS_QUANTIZE,[$enable_quantize])
AC_SUBST(HAS_LOGGING,[$enable_logging])
@@ -2125,6 +2183,11 @@ AC_SUBST([S3TESTBUCKET],["unidata-zarr-test-data"])
AC_DEFINE([S3TESTSUBTREE], ["netcdf-c"], [S3 test path prefix])
AC_SUBST([S3TESTSUBTREE],[netcdf-c])
+# Additional S3 Test Endpoint
+# WARNING: this must match the value in CMakeLists.txt
+AC_DEFINE([S3ENDPOINT], ["s3.us-east-1.amazonaws.com"], [S3 test endpoint])
+AC_SUBST([S3ENDPOINT],["s3.us-east-1.amazonaws.com"])
+
# Build a small unique id to avoid interference on same platform
PLATFORMUID="$RANDOM"
# Make sure uid > 0
@@ -2290,6 +2353,7 @@ AX_SET_META([NC_HAS_S3_AWS],[$enable_s3_aws],[yes])
AX_SET_META([NC_HAS_S3_INTERNAL],[$enable_s3_internal],[yes])
AX_SET_META([NC_HAS_HDF5_ROS3],[$has_hdf5_ros3],[yes])
AX_SET_META([NC_HAS_NCZARR],[$enable_nczarr],[yes])
+AX_SET_META([NC_HAS_NCZARR_V3],[$enable_nczarr_v3],[yes])
AX_SET_META([NC_HAS_LOGGING],[$enable_logging],[yes])
AX_SET_META([NC_HAS_QUANTIZE],[$enable_quantize],[yes])
AX_SET_META([NC_HAS_SZIP],[$enable_hdf5_szip],[yes])
@@ -2322,7 +2386,7 @@ AC_MSG_NOTICE([generating header files and makefiles])
AC_CONFIG_FILES(test_common.sh:test_common.in)
AC_CONFIG_FILES(s3cleanup.sh:s3cleanup.in, [chmod ugo+x s3cleanup.sh])
AC_CONFIG_FILES(s3gc.sh:s3gc.in, [chmod ugo+x s3gc.sh])
-for FP in plugins nc_test4 nczarr_test h5_test examples/C ; do
+for FP in plugins nc_test4 nczarr_test v3_nczarr_test h5_test examples/C ; do
AC_CONFIG_FILES(${FP}/findplugin.sh:plugins/findplugin.in, [chmod ugo+x ${FP}/findplugin.sh])
done
AC_CONFIG_FILES(ncdap_test/findtestserver.c:ncdap_test/findtestserver.c.in, [chmod ugo+x ncdap_test/findtestserver.c])
@@ -2343,6 +2407,7 @@ AC_CONFIG_FILES(nczarr_test/test_filter_repeat.c:nc_test4/test_filter_repeat.c)
AC_CONFIG_FILES(nczarr_test/test_filter_order.c:nc_test4/test_filter_order.c)
AC_CONFIG_FILES([examples/C/run_par_test.sh], [chmod ugo+x examples/C/run_par_test.sh])
AC_CONFIG_FILES([nc-config], [chmod 755 nc-config])
+
AC_CONFIG_FILES([Makefile
netcdf.pc
libnetcdf.settings
@@ -2387,6 +2452,7 @@ AC_CONFIG_FILES([Makefile
dap4_test/Makefile
plugins/Makefile
nczarr_test/Makefile
+ v3_nczarr_test/Makefile
])
AC_OUTPUT()
diff --git a/dap4_test/CMakeLists.txt b/dap4_test/CMakeLists.txt
index cd9be513c3..9a0f3b5ec0 100644
--- a/dap4_test/CMakeLists.txt
+++ b/dap4_test/CMakeLists.txt
@@ -52,8 +52,9 @@ ENDIF()
IF(NETCDF_ENABLE_DAP_REMOTE_TESTS)
add_sh_test(dap4_test test_remote)
- add_sh_test(dap4_test test_hyrax)
add_sh_test(dap4_test test_dap4url)
+# Hyrax is acting flakey, so temporarily disable
+# add_sh_test(dap4_test test_hyrax)
IF(RUN_MANUAL_TESTS)
# The following test can only be run by hand.
# It tests earthdata authorization.
diff --git a/dap4_test/Makefile.am b/dap4_test/Makefile.am
index a6b1c9f566..ec5c81ec79 100644
--- a/dap4_test/Makefile.am
+++ b/dap4_test/Makefile.am
@@ -47,9 +47,11 @@ if NETCDF_ENABLE_DAP_REMOTE_TESTS
TESTS += test_remote.sh
TESTS += test_constraints.sh
-TESTS += test_hyrax.sh
TESTS += test_dap4url.sh
+# Hyrax is acting flakey, so temporarily disable
+#TESTS += test_hyrax.sh
+
# The following test can only be run by hand.
# It tests earthdata authorization.
# Before running it, one needs to do the following:
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 36f6456e90..694a00c1b4 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -90,7 +90,7 @@ ENDIF(NETCDF_ENABLE_DOXYGEN)
SET(CUR_EXTRA_DIST ${CUR_EXTRA_DIST}
netcdf.m4 DoxygenLayout.xml Doxyfile.in footer.html
mainpage.dox tutorial.dox guide.dox types.dox
-architecture.dox internal.dox windows-binaries.md
+architecture.dox internal.dox windows-binaries.md dispatch.md
building-with-cmake.md CMakeLists.txt groups.dox install.md notes.md
install-fortran.md credits.md auth.md
obsolete/fan_utils.html bestpractices.md filters.md indexing.md
diff --git a/docs/cloud.md b/docs/cloud.md
index 31eb25e43d..75eb2fc260 100644
--- a/docs/cloud.md
+++ b/docs/cloud.md
@@ -12,6 +12,8 @@ Cloud Storage Access Using The NetCDF-C Library
The NetCDF-C library supports limited access to cloud storage.
Currently, that access is restricted to the Amazon S3 cloud storage,
so this document is S3-centric.
+Limited support is also provided for the Google cloud storage.
+Google provides an S3 compatible REST API (See the _quickstart_paths.md document).
It is expected that over time, access to additional cloud stores will be added,
and this document will be expanded to cover those additional cases.
diff --git a/docs/dispatchers.md b/docs/dispatchers.md
new file mode 100644
index 0000000000..e69a0c7767
--- /dev/null
+++ b/docs/dispatchers.md
@@ -0,0 +1,1323 @@
+Internal Dispatch Table Architecture
+============================
+
+
+# Internal Dispatcher Architectures
+
+
+
+[TOC]
+
+# Introduction {#dispatch_intro}
+
+The netcdf-c library uses an internal dispatch mechanism
+as the means for wrapping the netcdf-c API around a wide variety
+of underlying storage and stream data formats.
+
+This document attempts to give an overview of the internal
+architectures of the major dispatcher modules. As such, this document is
+long an will grow longer over time.
+
+As a first step, a description is provided on how dispatch modules
+are chosen. After that, the internal architecture for specific modules
+is described. The
+
+As of last check, the following formats are supported and each has its
+own dispatch table. But beware: some of the listed function signatures
+may be out of date and the specific code should be consulted to see
+the actual parameters.
+
+As specific dispatch module architectures are described, that is noted
+in the last column of the table of known dispatchers.
+
+
+
Format
Directory/File (optional)
NC_FORMATX Name
Described
+
Dispatch Detector
libdispatch/dinfermodel.c
N.A.
yes
+
NetCDF-classic
libsrc
NC_FORMATX_NC3
no
+
NetCDF-enhanced
libhdf5
NC_FORMATX_NC_HDF5
no
+
HDF4
libhdf4
NC_FORMATX_NC_HDF4
no
+
PNetCDF
libsrcp
NC_FORMATX_PNETCDF
no
+
DAP2
libdap2
NC_FORMATX_DAP2
no
+
DAP4
libdap4
NC_FORMATX_DAP4
no
+
UDF0
N.A.
NC_FORMATX_UDF0
no
+
UDF1
N.A.
NC_FORMATX_UDF1
no
+
NCZarr
libnczarr
NC_FORMATX_NCZARR
no
+
+
+## Dispatch Detector {#dispatch_detector}
+
+The idea for the dispatch detector is that when a user opens or
+creates a netcdf file, a specific dispatch table is chosen. A
+dispatch table is a struct containing an entry for (almost) every
+function in the netcdf-c API. During execution, netcdf API calls are
+channeled through that dispatch table to the appropriate function for
+implementing that API call. The functions in the dispatch table are
+not quite the same as those defined in *netcdf.h*. For simplicity and
+compactness, some netcdf.h API calls are mapped to the same dispatch
+table function. In addition to the functions, the first entry in the
+table defines the model that this dispatch table implements. It will
+be one of the NC_FORMATX_XXX values. The second entry in the table is
+the version of the dispatch table. The rule is that previous entries
+may not be removed, but new entries may be added, and adding new
+entries increases the version number.
+
+The dispatch table represents a distillation of the netcdf API down to
+a minimal set of internal operations. The format of the dispatch table
+is defined in the file *libdispatch/ncdispatch.h*. Every new dispatch
+table must define this minimal set of operations.
+
+### Adding a New Dispatch Table
+In order to make this process concrete, let us assume we plan to add
+an in-memory implementation of netcdf-3.
+
+#### Defining configure.ac flags
+
+Define a *–-enable* flag option for *configure.ac*. For our
+example, we assume the option "--enable-ncm" and the
+internal corresponding flag "enable_ncm". If you examine the existing
+*configure.ac* and see how, for example, *--enable_dap2* is
+defined, then it should be clear how to do it for your code.
+
+#### Defining a "name space"
+
+Choose some prefix of characters to identify the new dispatch
+system. In effect we are defining a name-space. For our in-memory
+system, we will choose "NCM" and "ncm". NCM is used for non-static
+procedures to be entered into the dispatch table and ncm for all other
+non-static procedures. Note that the chosen prefix should probably start
+with "nc" or "NC" in order to avoid name conflicts outside the netcdf-c library.
+
+#### Extend include/netcdf.h
+
+Modify the file *include/netcdf.h* to add an NC_FORMATX_XXX flag
+by adding a flag for this dispatch format at the appropriate places.
+````
+ #define NC_FORMATX_NCM 7
+````
+
+Add any format specific new error codes.
+````
+###define NC_ENCM (?)
+````
+
+#### Extend include/ncdispatch.h
+
+Modify the file *include/ncdispatch.h* to
+add format specific data and initialization functions;
+note the use of our NCM namespace.
+````
+ #ifdef ENABLE_NCM
+ extern NC_Dispatch* NCM_dispatch_table;
+ extern int NCM_initialize(void);
+ #endif
+````
+
+#### Define the dispatch table functions
+
+Define the functions necessary to fill in the dispatch table. As a
+rule, we assume that a new directory is defined, *libsrcm*, say. Within
+this directory, we need to define *Makefile.am* and *CMakeLists.txt*.
+We also need to define the source files
+containing the dispatch table and the functions to be placed in the
+dispatch table -– call them *ncmdispatch.c* and *ncmdispatch.h*. Look at
+*libsrc/nc3dispatch.[ch]* or *libnczarr/zdispatch.[ch]* for examples.
+
+Similarly, it is best to take existing *Makefile.am* and *CMakeLists.txt*
+files (from *libsrcp* for example) and modify them.
+
+#### Adding the dispatch code to libnetcdf
+
+Provide for the inclusion of this library in the final libnetcdf
+library. This is accomplished by modifying *liblib/Makefile.am* by
+adding something like the following.
+````
+ if ENABLE_NCM
+ libnetcdf_la_LIBADD += $(top_builddir)/libsrcm/libnetcdfm.la
+ endif
+````
+
+#### Extend library initialization
+
+Modify the *NC_initialize* function in *liblib/nc_initialize.c* by adding
+appropriate references to the NCM dispatch function.
+````
+ #ifdef ENABLE_NCM
+ extern int NCM_initialize(void);
+ #endif
+ ...
+ int NC_initialize(void)
+ {
+ ...
+ #ifdef ENABLE_NCM
+ if((stat = NCM_initialize())) return stat;
+ #endif
+ ...
+ }
+````
+
+Finalization is handled in an analogous fashion.
+
+#### Testing the new dispatch table
+
+Typically, tests for a new dispatcher are kept in a separate directory
+with a related name. For our running example, it might be *ncm_test*.
+The file *ncm_test/Makefile.am*
+will look something like this.
+````
+ # These files are created by the tests.
+ CLEANFILES = ...
+ # These are the tests which are always run.
+ TESTPROGRAMS = test1 test2 ...
+ test1_SOURCES = test1.c ...
+ ...
+ # Set up the tests.
+ check_PROGRAMS = $(TESTPROGRAMS)
+ TESTS = $(TESTPROGRAMS)
+ # Any extra files required by the tests
+ EXTRA_DIST = ...
+````
+
+### Top-Level build of the dispatch code
+
+Provide for *libnetcdfm* to be constructed by adding the following to
+the top-level *Makefile.am*.
+
+````
+ if ENABLE_NCM
+ NCM=libsrcm
+ NCMTESTDIR=ncm_test
+ endif
+ ...
+ SUBDIRS = ... $(DISPATCHDIR) $(NCM) ... $(NCMTESTDIR)
+````
+
+### Choosing a Dispatch Table
+
+The dispatch table is ultimately chosen by the function
+NC_infermodel() in libdispatch/dinfermodel.c. This function is
+invoked by the NC_create and the NC_open procedures. This can
+be, unfortunately, a complex process. The detailed operation of
+NC_infermodel() is defined in the companion document in docs/dinternal.md.
+
+In any case, the choice of dispatch table is currently based on the following
+pieces of information.
+
+1. The mode argument – this can be used to detect, for example, what kind
+of file to create: netcdf-3, netcdf-4, 64-bit netcdf-3, etc.
+Using a mode flag is the most common mechanism, in which case
+*netcdf.h* needs to be modified to define the relevant mode flag.
+
+2. The file path – this can be used to detect, for example, a DAP url
+versus a normal file system file. If the path looks like a URL, then
+the fragment part of the URL is examined to determine the specific
+dispatch function.
+
+3. The file contents - when the contents of a real file are available,
+the contents of the file can be used to determine the dispatch table.
+As a rule, this is likely to be useful only for *nc_open*.
+
+4. If the file is being opened vs being created.
+
+5. Is parallel IO available?
+
+The *NC_infermodel* function returns two values.
+
+1. model - this is used by nc_open and nc_create to choose the dispatch table.
+2. newpath - in some case, usually URLS, the path may be rewritten to include extra information for use by the dispatch functions.
+
+### Special Dispatch Table Signatures.
+
+The entries in the dispatch table do not necessarily correspond
+to the external API. In many cases, multiple related API functions
+are merged into a single dispatch table entry.
+
+#### Create/Open
+
+The create table entry and the open table entry in the dispatch table
+have the following signatures respectively.
+````
+ int (*create)(const char *path, int cmode,
+ size_t initialsz, int basepe, size_t *chunksizehintp,
+ int useparallel, void* parameters,
+ struct NC_Dispatch* table, NC* ncp);
+
+ int (*open)(const char *path, int mode,
+ int basepe, size_t *chunksizehintp,
+ int use_parallel, void* parameters,
+ struct NC_Dispatch* table, NC* ncp);
+````
+
+The key difference is that these are the union of all the possible
+create/open signatures from the include/netcdfXXX.h files. Note especially the last
+three parameters. The parameters argument is a pointer to arbitrary data
+to provide extra info to the dispatcher.
+The table argument is included in case the create
+function (e.g. *NCM_create_) needs to invoke other dispatch
+functions. The very last argument, ncp, is a pointer to an NC
+instance. The raw NC instance will have been created by *libdispatch/dfile.c*
+and is passed to e.g. open with the expectation that it will be filled in
+by the dispatch open function.
+
+#### Accessing Data with put_vara() and get_vara()
+
+````
+ int (*put_vara)(int ncid, int varid, const size_t *start, const size_t *count,
+ const void *value, nc_type memtype);
+````
+
+````
+ int (*get_vara)(int ncid, int varid, const size_t *start, const size_t *count,
+ void *value, nc_type memtype);
+````
+
+Most of the parameters are similar to the netcdf API parameters. The
+last parameter, however, is the type of the data in
+memory. Additionally, instead of using an "int islong" parameter, the
+memtype will be either ::NC_INT or ::NC_INT64, depending on the value
+of sizeof(long). This means that even netcdf-3 code must be prepared
+to encounter the ::NC_INT64 type.
+
+#### Accessing Attributes with put_attr() and get_attr()
+
+````
+ int (*get_att)(int ncid, int varid, const char *name,
+ void *value, nc_type memtype);
+````
+
+````
+ int (*put_att)(int ncid, int varid, const char *name, nc_type datatype, size_t len,
+ const void *value, nc_type memtype);
+````
+
+Again, the key difference is the memtype parameter. As with
+put/get_vara, it used ::NC_INT64 to encode the long case.
+
+#### Pre-defined Dispatch Functions
+
+It is sometimes not necessary to implement all the functions in the
+dispatch table. Some pre-defined functions are available which may be
+used in many cases.
+
+#### Inquiry Functions
+
+Many of The netCDF inquiry functions operate from an in-memory model of
+metadata. Once a file is opened, or a file is created, this
+in-memory metadata model is kept up to date. Consequenty the inquiry
+functions do not depend on the dispatch layer code. These functions
+can be used by all dispatch layers which use the internal netCDF
+enhanced data model.
+
+- NC4_inq
+- NC4_inq_type
+- NC4_inq_dimid
+- NC4_inq_dim
+- NC4_inq_unlimdim
+- NC4_inq_att
+- NC4_inq_attid
+- NC4_inq_attname
+- NC4_get_att
+- NC4_inq_varid
+- NC4_inq_var_all
+- NC4_show_metadata
+- NC4_inq_unlimdims
+- NC4_inq_ncid
+- NC4_inq_grps
+- NC4_inq_grpname
+- NC4_inq_grpname_full
+- NC4_inq_grp_parent
+- NC4_inq_grp_full_ncid
+- NC4_inq_varids
+- NC4_inq_dimids
+- NC4_inq_typeids
+- NC4_inq_type_equal
+- NC4_inq_user_type
+- NC4_inq_typeid
+
+#### NCDEFAULT get/put Functions
+
+The mapped (varm) get/put functions have been
+implemented in terms of the array (vara) functions. So dispatch layers
+need only implement the vara functions, and can use the following
+functions to get the and varm functions:
+
+- NCDEFAULT_get_varm
+- NCDEFAULT_put_varm
+
+For the netcdf-3 format, the strided functions (nc_get/put_vars)
+are similarly implemented in terms of the vara functions. So the following
+convenience functions are available.
+
+- NCDEFAULT_get_vars
+- NCDEFAULT_put_vars
+
+For the netcdf-4 format, the vars functions actually exist, so
+the default vars functions are not used.
+
+#### Read-Only Functions
+
+Some dispatch layers are read-only (ex. HDF4). Any function which
+writes to a file, including nc_create(), needs to return error code
+::NC_EPERM. The following read-only functions are available so that
+these don't have to be re-implemented in each read-only dispatch layer:
+
+- NC_RO_create
+- NC_RO_redef
+- NC_RO__enddef
+- NC_RO_sync
+- NC_RO_set_fill
+- NC_RO_def_dim
+- NC_RO_rename_dim
+- NC_RO_rename_att
+- NC_RO_del_att
+- NC_RO_put_att
+- NC_RO_def_var
+- NC_RO_rename_var
+- NC_RO_put_vara
+- NC_RO_def_var_fill
+
+#### Classic NetCDF Only Functions
+
+There are two functions that are only used in the classic code. All
+other dispatch layers (except PnetCDF) return error ::NC_ENOTNC3 for
+these functions. The following functions are provided for this
+purpose:
+
+- NOTNC3_inq_base_pe
+- NOTNC3_set_base_pe
+
+#### HDF4 Dispatch Layer as a Simple Example
+
+The HDF4 dispatch layer is about the simplest possible dispatch
+layer. It is read-only, classic model. It will serve as a nice, simple
+example of a dispatch layer.
+
+Note that the HDF4 layer is optional in the netCDF build. Not all
+users will have HDF4 installed, and those users will not build with
+the HDF4 dispatch layer enabled. For this reason HDF4 code is guarded
+as follows.
+````
+###ifdef USE_HDF4
+...
+###endif /*USE_HDF4*/
+````
+Code in libhdf4 is only compiled if HDF4 is
+turned on in the build.
+
+#### Header File Changes
+
+Adding the HDF4 dispatch table will first require changes to
+a number of header files.
+
+##### The netcdf.h File
+
+In the main netcdf.h file, we add the following
+to the list of NC_FORMATX_XXX definitions
+````
+###define NC_FORMATX_NC_HDF4 (3)
+````
+
+##### The ncdispatch.h File
+
+In ncdispatch.h we add the following:
+
+````
+###ifdef USE_HDF4
+extern NC_Dispatch* HDF4_dispatch_table;
+extern int HDF4_initialize(void);
+extern int HDF4_finalize(void);
+###endif
+````
+
+##### The netcdf_meta.h File
+
+The netcdf_meta.h file allows for easy determination of what features
+are in use. For HDF4, the following is added -- as set by *./configure*:
+````
+###define NC_HAS_HDF4 0 /*!< HDF4 support. */
+````
+
+##### The hdf4dispatch.h File
+
+The file *hdf4dispatch.h* contains prototypes and
+macro definitions used within the HDF4 code in libhdf4. This include
+file should not be used anywhere except in libhdf4. It can be kept
+in either the *include* directory or (preferably) the *libhdf4* directory.
+
+##### Initialization Code Changes in liblib Directory
+
+The file *nc_initialize.c* is modified to include the following:
+````
+###ifdef USE_HDF4
+extern int HDF4_initialize(void);
+extern int HDF4_finalize(void);
+###endif
+````
+
+##### Changes to libdispatch/dfile.c
+
+In order for a dispatch layer to be used, it must be correctly
+determined in functions *NC_open()* or *NC_create()* in *libdispatch/dfile.c*.
+HDF4 has a magic number that is detected in
+*NC_interpret_magic_number()*, which allows *NC_open* to automatically
+detect an HDF4 file.
+
+Once HDF4 is detected, the *model* variable is set to *NC_FORMATX_NC_HDF4*,
+and later this is used in a case statement:
+````
+ case NC_FORMATX_NC_HDF4:
+ dispatcher = HDF4_dispatch_table;
+ break;
+````
+
+This sets the dispatcher to the HDF4 dispatcher, which is defined in
+the libhdf4 directory.
+
+##### Dispatch Table in libhdf4/hdf4dispatch.c
+
+The file *hdf4dispatch.c* contains the definition of the HDF4 dispatch
+table. It looks like this:
+````
+/* This is the dispatch object that holds pointers to all the
+ * functions that make up the HDF4 dispatch interface. */
+static NC_Dispatch HDF4_dispatcher = {
+NC_FORMATX_NC_HDF4, /* The model identifier */
+NC_DISPATCH_VERSION, /* The version of this dispatch table */
+NC_RO_create,
+NC_HDF4_open,
+NC_RO_redef,
+NC_RO__enddef,
+NC_RO_sync,
+...
+NC_NOTNC4_set_var_chunk_cache,
+NC_NOTNC4_get_var_chunk_cache,
+...
+};
+````
+Note that most functions use some of the predefined dispatch
+functions. Functions that start with NC_RO* are read-only, they return
+::NC_EPERM. Functions that start with NOTNC4* return ::NC_ENOTNC4.
+
+Only the functions that start with NC_HDF4* need to be implemented for
+the HDF4 dispatch layer. There are 6 such functions:
+
+- NC_HDF4_open
+- NC_HDF4_abort
+- NC_HDF4_close
+- NC_HDF4_inq_format
+- NC_HDF4_inq_format_extended
+- NC_HDF4_get_vara
+
+##### HDF4 Reading Code
+
+The code in *hdf4file.c* opens the HDF4 SD dataset, and reads the
+metadata. This metadata is stored in the netCDF internal metadata
+model, allowing the inq functions to work.
+
+The code in *hdf4var.c* does an *nc_get_vara()* on the HDF4 SD
+dataset. This is all that is needed for all the nc_get_* functions to
+work.
+
+### Appendix A. Changing NC_DISPATCH_VERSION
+
+When new entries are added to the *struct NC_Dispatch* type `located in include/netcdf_dispatch.h.in` it is necessary to do two things.
+
+1. Bump the NC_DISPATCH_VERSION number
+2. Modify the existing dispatch tables to include the new entries.
+It if often the case that the new entries do not mean anything for
+a given dispatch table. In that case, the new entries may be set to
+some variant of *NC_RO_XXX* or *NC_NOTNC4_XXX* *NC_NOTNC3_XXX*.
+
+Modifying the dispatch version requires two steps:
+1. Modify the version number in *netcdf-c/configure.ac*, and
+2. Modify the version number in *netcdf-c/CMakeLists.txt*.
+
+The two should agree in value.
+
+#### NC_DISPATCH_VERSION Incompatibility
+
+When dynamically adding a dispatch table
+-- in nc_def_user_format (see libdispatch/dfile.c) --
+the version of the new table is compared with that of the built-in
+NC_DISPATCH_VERSION; if they differ, then an error is returned from
+that function.
+
+### Appendix B. Inferring the Dispatch Table
+
+As mentioned above, the dispatch table is inferred using the following
+information:
+1. The mode argument
+2. The file path/URL
+3. The file contents (when available)
+
+The primary function for doing this inference is in the file
+*libdispatch/dinfermodel.c* via the API in *include/ncmodel.h*.
+The term *model* is used here to include (at least) the following
+information (see the structure type *NCmodel* in *include/ncmodel.h*).
+
+1. impl -- this is an NC_FORMATX_XXX value defining, in effect, the
+ dispatch table to use.
+2. format -- this is an NC_FORMAT_XXX value defining the API to support: netcdf classic or netcdf enhanced.
+
+The construction of the model is primarily carried out by the function
+*NC*infermodel()* (in *libdispatch/dinfermodel.c*).
+It is given the following parameters:
+1. path -- (IN) absolute file path or URL
+2. modep -- (IN/OUT) the set of mode flags given to *NC_open* or *NC_create*.
+3. iscreate -- (IN) distinguish open from create.
+4. useparallel -- (IN) indicate if parallel IO can be used.
+5. params -- (IN/OUT) arbitrary data dependent on the mode and path.
+6. model -- (IN/OUT) place to store inferred model.
+7. newpathp -- (OUT) the canonical rewrite of the path argument.
+
+As a rule, these values are used in the this order to infer the model.
+1. file contents -- highest precedence
+2. url (if it is one) -- using the "mode=" key in the fragment (see below).
+3. mode flags
+4. default format -- lowest precedence
+
+If the path appears to be a URL, then it is parsed.
+Information is extracted from the URL, and specifically,
+the fragment key "mode=" is the critical element.
+The URL will be rewritten to a canonical form with the following
+changes.
+1. The fragment part ("#..." at the end) is parsed and the "mode=" key
+ is extracted and its value is converted to a list of tags.
+2. If the leading protocol is not http/https, then the protocol is added
+ to the mode list. That protocol is then replaced with either http or https.
+3. Certain singleton values in the fragment are extracted and removed
+ and added to the mode list. Consider, for example, "http://....#dap4".
+ The "dap4" singleton is removed and added to the mode list.
+4. For backward compatibility, the values of "proto=" and "protocol="
+ are removed from the fragment and their value is added to the mode list.
+5. The final mode list is converted to a comma separated string
+ and re-inserted into the fragment.
+6. The final mode list is modified to remove duplicates.
+
+The final result is the canonical form of the URL and is returned in the
+newpathp argument described above.
+
+The mode list then is used as part of the inference process to choose
+a dispatch table.
+
+# Point of Contact {#dispatch_poc}
+
+*Author*: Dennis Heimbigner
+*Email*: dennis.heimbigner@gmail.com
+*Initial Version*: 12/22/2021
+*Last Revised*: 7/7/2024
+
diff --git a/docs/filters.md b/docs/filters.md
index 8a510ee3db..de7103ea93 100644
--- a/docs/filters.md
+++ b/docs/filters.md
@@ -700,10 +700,8 @@ one less than the number of significant bunary figures:
artifacts in multipoint statistics introduced by BitGroom
(see https://doi.org/10.5194/gmd-14-377-2021).
-
# Debugging {#filters_debug}
-
Depending on the debugger one uses, debugging plugins can be very difficult.
It may be necessary to use the old printf approach for debugging the filter itself.
@@ -993,11 +991,11 @@ typedef struct NCZ_codec_t {
Currently always NCZ_CODEC_HDF5 */
const char* codecid; /* The name/id of the codec */
unsigned int hdf5id; /* corresponding hdf5 id */
- void (*NCZ_codec_initialize)(void);
- void (*NCZ_codec_finalize)(void);
- int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp);
- int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
- int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* nparamsp, unsigned** paramsp);
+ void (*NCZ_codec_initialize)(NCproplist* env);
+ void (*NCZ_codec_finalize)(NCproplist* env);
+ int (*NCZ_codec_to_hdf5)(NCproplist* env, const char* codec, int* h5idp, int* nparamsp, unsigned** paramsp);
+ int (*NCZ_hdf5_to_codec)(NCproplist* env, int h5id, size_t nparams, const unsigned* params, char** codecp);
+ int (*NCZ_modify_parameters)(NCproplist* env, int* h5idp, size_t* vnparamsp, unsigned** vparamsp, size_t* nparamsp, unsigned** paramsp);
} NCZ_codec_t;
````
@@ -1015,12 +1013,14 @@ visible parameters.
##### Signature
````
- int NCZ_codec_to_hdf(const char* codec, int* nparamsp, unsigned** paramsp);
+ int NCZ_codec_to_hdf(NCproplist* env, const char* codec, int* h5idp, int* nparamsp, unsigned** paramsp);
````
##### Arguments
-1. codec — (in) ptr to JSON string representing the codec.
-2. nparamsp — (out) store the length of the converted HDF5 unsigned vector
-3. paramsp — (out) store a pointer to the converted HDF5 unsigned vector; caller must free the returned vector. Note the double indirection.
+1. env $mdash; (in) ptr to a property list of key+value pairs.
+2. codec — (in) ptr to JSON string representing the codec.
+3. h5idp — (in/out) the hdf5 filter id.
+4. nparamsp — (out) store the length of the converted HDF5 unsigned vector
+5. paramsp — (out) store a pointer to the converted HDF5 unsigned vector; caller must free the returned vector. Note the double indirection.
Return Value: a netcdf-c error code.
@@ -1031,12 +1031,12 @@ return a corresponding JSON codec representation of those visible parameters.
##### Signature
````
- int NCZ_hdf5_to_codec)(int ncid, int varid, size_t nparams, const unsigned* params, char** codecp);
+ int NCZ_hdf5_to_codec)(NCproplist* env, int id, size_t nparams, const unsigned* params, char** codecp);
````
##### Arguments
-1. ncid — the variables' containing group
-2. varid — the containing variable
+1. env — property list of key+value pairs.
+2. id — the hdf5 id.
3. nparams — (in) the length of the HDF5 visible parameters vector
4. params — (in) pointer to the HDF5 visible parameters vector.
5. codecp — (out) store the string representation of the codec; caller must free.
@@ -1050,12 +1050,12 @@ to a set of working parameters; also provide option to modify visible parameters
##### Signature
````
- int NCZ_modify_parameters(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
+ int NCZ_modify_parameters(NCproplist* env, int* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
````
##### Arguments
-1. ncid — (in) group id containing the variable.
-2. varid — (in) the id of the variable to which this filter is being attached.
+1. env — (in) property list of key+value pairs.
+2. idp — (in/out) the hdf5 id.
3. vnparamsp — (in/out) the count of visible parameters
4. vparamsp — (in/out) the set of visible parameters
5. wnparamsp — (out) the count of working parameters
@@ -1070,8 +1070,12 @@ This function is called as soon as a shared library is loaded and matched with a
##### Signature
````
- int NCZ_codec_initialize)(void);
+ int NCZ_codec_initialize)(NCproplist* env);
````
+##### Arguments
+
+1. env — (in) property list of key+value pairs.
+
Return Value: a netcdf-c error code.
#### NCZ\_codec\_finalize
@@ -1082,8 +1086,12 @@ If the client code does not invoke *nc\_finalize* then memory checkers may compl
##### Signature
````
- int NCZ_codec_finalize)(void);
+ int NCZ_codec_finalize)(NCproplist* env);
````
+##### Arguments
+
+1. env — (in) property list of key+value pairs.
+
Return Value: a netcdf-c error code.
### Multi-Codec API
@@ -1106,7 +1114,26 @@ The list of returned items are used to try to provide defaults
for any HDF5 filters that have no corresponding Codec.
This is for internal use only.
-## Appendix F. Standard Filters {#filters_appendixf}
+## Appendix F. Default HDF5 Filter Codecs {#filters_appendixf}
+
+It is recognized that it will be a while (if ever) until
+HDF5 filters also specify the necessary codec information.
+In order to provide some support for filters that do not have
+corresponding codec support, a "_hdf5raw_" codec manager is provided.
+
+This hdf5raw codec manager encodes the parameters of the HDF5 filter
+into one of these two codec forms:
+
+* Zarr Version 2
+ ````{"id": "_hdf5raw_", "hdf5id": "<hdf5-id>, "nparams": <uint>, "0": <uint>...,"<N>": <uint>}````
+* Zarr Version 3
+ ````{"name": "_hdf5raw_", "configuration": {"hdf5id": <uint>, "nparams": <uint>, "0": <uint>...,"<N>": <uint>}}````
+
+There are couple things to note about hdf5raw:
+1. this cannot be used if a modify_parameters function is required.
+2. this representation will not be usable by other Zarr implementations, unless of course they choose to implement it.
+
+## Appendix G. Standard Filters {#filters_appendixg}
Support for a select set of standard filters is built into the NetCDF API.
Generally, they are accessed using the following generic API, where XXXX is
@@ -1136,10 +1163,10 @@ Consider the zstandard compressor, which is one of the supported standard filter
When installing the netcdf library, the following other libraries must be installed.
1. *libzstd.so* | *zstd.dll* | *libzstd.dylib* -- The actual zstandard compressor library; typically installed by using your platform specific package manager.
-2. The HDF5 wrapper for *libzstd.so* -- There are several options for obtaining this (see [Appendix G](#filters_appendixg).)
+2. The HDF5 wrapper for *libzstd.so* -- There are several options for obtaining this (see [Appendix H](#filters_appendixh).)
3. (Optional) The Zarr wrapper for *libzstd.so* -- you need this if you intend to read/write Zarr datasets that were compressed using zstandard; again see [Appendix G](#filters_appendixg).
-## Appendix G. Finding Filter Implementations {#filters_appendixg}
+## Appendix H. Finding Filter Implementations {#filters_appendixh}
A major problem for filter users is finding an implementation of an HDF5 filter wrapper and (optionally)
its corresponding NCZarr wrapper. There are several ways to do this.
@@ -1160,7 +1187,7 @@ You can install this library to get access to these supported filters.
It does not currently include the required NCZarr Codec API,
so they are only usable with netcdf-4. This will change in the future.
-## Appendix H. Auto-Install of Filter Wrappers {#filters_appendixh}
+## Appendix I. Auto-Install of Filter Wrappers {#filters_appendixi}
As part of the overall build process, a number of filter wrappers are built as shared libraries in the "plugins" directory.
These wrappers can be installed as part of the overall netcdf-c installation process.
@@ -1185,7 +1212,7 @@ provided by the *lib__nczh5filters.so* shared library. Note also that
if you disable HDF5 support, but leave NCZarr support enabled,
then all of the above filters should continue to work.
-## Appendix I. A Warning on Backward Compatibility {#filters_appendixi}
+## Appendix J. A Warning on Backward Compatibility {#filters_appendixj}
The API defined in this document should accurately reflect the
current state of filters in the netCDF-c library. Be aware that
@@ -1212,4 +1239,4 @@ For additional information, see [Appendix B](#filters_appendixb).
*Author*: Dennis Heimbigner
*Email*: dennis.heimbigner@gmail.com
*Initial Version*: 1/10/2018
-*Last Revised*: 5/18/2022
+*Last Revised*: 10/18/2023
diff --git a/docs/internal.md b/docs/internal.md
index 02ec903fc3..f3ce837571 100644
--- a/docs/internal.md
+++ b/docs/internal.md
@@ -13,6 +13,7 @@ It covers the following issues.
* [Inferring File Types](#intern_infer)
* [Adding a Standard Filter](#intern_filters)
* [Test Interference](#intern_isolation)
+* [Managing NCZarr Tests](#intern_nczarr_tests)
# 1. Including C++ Code in the netcdf-c Library {#intern_cpp}
@@ -246,7 +247,7 @@ use this information to speed up the handling of fixed size types.
# 3. Inferring File Types {#intern_infer}
-As described in the companion document -- docs/dispatch.md --
+As described in the companion document -- docs/dispatchers.md --
when nc\_create() or nc\_open() is called, it must figure out what
kind of file is being created or opened. Once it has figured out
the file kind, the appropriate "dispatch table" can be used
@@ -655,31 +656,34 @@ It soon became apparent that there were resources shared between tests and that
execution sometimes caused interference between tests.
In order to fix the inter-test interference, several approaches were used.
-1. Renaming resources (primarily files) so that tests would create difference test files.
+1. Renaming resources (primarily files) so that tests would create different test files.
2. Telling the test system that there were explicit dependencies between tests so that they would not be run in parallel.
3. Isolating test resources by creating independent directories for each test.
## Test Isolation
-The isolation mechanism is currently used mostly in nczarr_tests.
+The isolation mechanism is currently used mostly in nczarr_tests/v3_nczarr_tests.
It requires that tests are all executed inside a shell script.
When the script starts, it invokes a shell function called "isolate".
-This function looks in current directory for a directory called "testset_\".
-If "testset_\ is not found then it creates it.
-This directory is then used to isolate all test output.
-
-After calling "isolate", the script enters the "testset_\"
-directory. Then each actual test creates a directory in which to
+This function looks in current directory for a directory called "alltests_\/\",
+where "\" is the name of a test directory such as "nczarr_tests", or "nc_test4", etc.
+If "alltests_\/\" is not found then it creates it.
+This directory is then used to isolate all test output for the specified test directory.
+After calling "isolate", the script enters the "alltests_\/\"
+directory.
+
+Within the test directory, each actual test creates a directory in which to
store any file resources that it creates during execution.
-Suppose, for example, that the shell script is called "run_XXXX.sh".
-The isolate function creates a directory with the general name "testset_\".
-Then the run_XXX.sh script creates a directory "testset_\/testdir_XXX",
+Suppose, for example, that the shell script is called "run_XXXX.sh", and is in the "ncdump" test directory.
+The isolate function creates a directory with the general name "alltests_\/ncdump".
+Then the run_XXX.sh script creates a directory "alltests_\/ncdump/testdir_XXX",
enters it and runs the test.
-During cleanup, specifically "make clean", all the testset_\ directories are deleted.
+During cleanup, specifically "make clean", the directory alltests_\ is deleted, which of course
+deletes all the subsidiary test directories.
The "\" is a unique identifier created using the "date +%s" command. It returns an integer
representing the number of seconds since the start of the so-called "epoch" basically
"00:00:00 UTC, 1 January 1970". Using a date makes it easier to detect and reclaim obsolete
-testset directories.
+"alltests" directories.
## Cloud Test Isolation
@@ -693,10 +697,12 @@ interfere with local testing by individual users.
This problem is difficult to solve, but a mostly complete solution has been implemented
possible with cmake, but not (as yet) possible with automake.
-In any case, there is a shell function called s3isolate in nczarr_test/test_nczarr.sh that operates on cloud resources in a way that is similar to the isolate function.
+In any case, there is a shell function called s3isolate in
+nczarr_test/test_nczarr.sh that operates on cloud resources in a way
+that is similar to the isolate function.
The s3isolate does several things:
1. It invokes isolate to ensure local isolation.
-2. It creates a path prefix relative to the Unidata S3 bucket that has the name "testset_\", where this name
+2. It creates a path prefix relative to the Unidata S3 bucket that has the name "alltests_\/\", where this name
is the same as the one created by the isolate function.
3. It appends the uid to a file called s3cleanup_\.uids. This file may accumulate several uids indicating
the keys that need to be cleaned up. The pid is a separate small unique id to avoid s3cleanup interference.
@@ -704,7 +710,7 @@ The s3isolate does several things:
The test script then ensures that any cloud resources are created as extensions of the path prefix.
Cleanup of S3 resources is complex.
-In configure.ac or the top-level CMakeList.txt files, the path "netcdf-c/testset_\"
+In configure.ac or the top-level CMakeList.txt files, the path "netcdf-c/alltests_\>"
is created and via configuration commands, is propagated to various Makefile.am
and specific script files.
@@ -717,9 +723,8 @@ In cmake, the CTestCustom.cmake mechanism is used and contains the following com
ENDIF()
````
-In automake, the "check-local" extension mechanism is used
-because it is invoked after all tests are run in the nczarr_test
-directory. So nczarr_test/Makefile.am contains the following
+In automake, the "clean-local" extension mechanism is used.
+So nczarr_test/Makefile.am contains the following
equivalent code:
````
if NETCDF_ENABLE_S3_TESTALL
@@ -763,9 +768,40 @@ This file is called "s3cleanup_\.json".
5. Use the "aws delete-objects" command to delete the keys.
6. Repeat steps 4 and 5 for each set of 500 keys.
+# 6. Managing NCZarr Tests {#intern_nczarr_tests}
+
+When testing NCZarr, it is necessary to run tests for (NC)Zarr version 2 (aka V2) and for (NC)Zarr version 3 (aka V3).
+In support of this, there are two test directories: *nczarr_test* and *v3_nczarr_test*.
+When the tests in *nczarr_test* are executed, they default to using Zarr version 2 (=> NCZarr version 2).
+Similarly, when the tests in *v3_nczarr_test* are executed, they default to using Zarr version 3 (=> NCZarr version 3).
+
+It turns out that almost all of the V2 tests can be reused for testing V3.
+So, the tests in *v3_nczarr_test* are copies of the tests in *nczarr_test*.
+It turns out that automake is not easily capable of copying those tests on the fly.
+This is principally because the automake *make distcheck* command does not allow
+modifications to the source directory, but only to the build directory. This means
+that the tests must be heavily modified to handle the two cases where scripts, programs,
+and test data are in _\$\$\{srcdir\}_ versus when they are in _\$\$\{builddir\}_.
+
+## AutoMake Testing
+For AutoMake testing, selected V2 tests are copied to the *v3_nczarr_test*
+directory using the *BUILT_SOURCES* mechanism.
+If a new test is added to the *nczarr_test* directory, then its manifestation
+in the *v3_nczarr_test* must be decided.
+
+If the test can be used directly, then the file v3_nczarr_test must be modified
+and added to the TESTFILES_NCZARR variable. If the data files
+referenced by the test can be used directly, then they should be added to the
+TESTDATA_NCZARR variable.
+
+If the test must be modified for V3 use, then the modified
+test should be github-add'ed to the v3_nczarr_test directory
+and inserted into the Makefile.am. Similarly, any V3 specific
+data files must be manually added to the Makefile.am and github-add'ed.
+
# Point of Contact {#intern_poc}
*Author*: Dennis Heimbigner
*Email*: dmh at ucar dot edu
*Initial Version*: 12/22/2021
-*Last Revised*: 9/16/2023
+*Last Revised*: 4/10/2024
diff --git a/docs/nczarr.md b/docs/nczarr.md
index e3db2016ea..e7f23f4822 100644
--- a/docs/nczarr.md
+++ b/docs/nczarr.md
@@ -10,7 +10,7 @@ The NetCDF NCZarr Implementation
Beginning with netCDF version 4.8.0, the Unidata NetCDF group has extended the netcdf-c library to support data stored using the Zarr data model and storage format [4,6]. As part of this support, netCDF adds support for accessing data stored using cloud storage (e.g. Amazon S3 [1] ).
-The goal of this project, then, is to provide maximum interoperability between the netCDF Enhanced (netcdf-4) data model and the Zarr version 2 [4] data model. This is embodied in the netcdf-c library so that it is possible to use the netcdf API to read and write Zarr formatted datasets.
+The goal of this project is to provide maximum interoperability between the netCDF Enhanced (netcdf-4) data model and the Zarr version 2 [4] or Version 3 [13] data model. This is embodied in the netcdf-c library so that it is possible to use the netcdf API to read and write Zarr formatted datasets.
In order to better support the netcdf-4 data model, the netcdf-c library implements a limited set of extensions to the *Zarr* data model.
This extended model is referred to as *NCZarr*.
@@ -31,22 +31,22 @@ Notes on terminology in this document.
# The NCZarr Data Model {#nczarr_data_model}
-NCZarr uses a data model that, by design, extends the Zarr Version 2 Specification .
+NCZarr uses a data model that, by design, extends the Zarr Version 2 Specification or Version 3 Specification.
__Note Carefully__: a legal _NCZarr_ dataset is expected to also be a legal _Zarr_ dataset.
-The inverse is true also. A legal _Zarr_ dataset is expected to also be a legal _NCZarr_ dataset, where "legal" means it conforms to the Zarr specification(s).
-In addition, certain non-Zarr features are allowed and used.
-Specifically the XArray [7] ''\_ARRAY\_DIMENSIONS'' attribute is one such.
+The inverse is true also. A legal _Zarr_ dataset is expected to also be a legal _NCZarr_ dataset, where "legal" means it conforms to the Zarr version 2 or 3 specification.
+In addition, certain extra-Zarr features are allowed and used, namely:
+1. the XArray [7] ''\_ARRAY\_DIMENSIONS'' attribute.
+2. the .zmetadata conventions where all the JSON metadata is held in a single object.
-There are two other, secondary assumption:
+There are two other, secondary assumptions:
1. The actual storage format in which the dataset is stored -- a zip file, for example -- can be read by the _Zarr_ implementation.
-2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See [the companion document on
+2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See the [companion document on
filters](./md_filters.html "filters") for details.
Briefly, the data model supported by NCZarr is netcdf-4 minus
-the user-defined types and full String type support.
-However, a restricted form of String type
+the user-defined types. However, a restricted form of String type
is supported (see Appendix D).
As with netcdf-4, chunking is supported. Filters and compression
are also [supported](./md_filters.html "filters").
@@ -108,9 +108,14 @@ See the document "quickstart_paths" for details about
using URLs.
There are, however, some details that are important.
-- Protocol: this should be _https_ or _s3_,or _file_.
- The _s3_ scheme is equivalent to "https" plus setting "mode=s3".
- Specifying "file" is mostly used for testing, but also for directory tree or zipfile format storage.
+Several URL protocols are semantically meaningfull for the NCZarr implementation.
+* _http_ or _https_ -- this just signals that we have a URL; the actual storage type is inferred from the _mode_ flag or by probing the object to which the URL refers.
+* _file_ -- The _file_ scheme is equivalent to "mode=...,file,..."
+* _s3_ -- The _s3_ scheme is equivalent to "https" plus setting "mode=...,s3,..." plus using an elided host.
+* _gs3_ -- The _gs3_ scheme is equivalent to "https" plus setting "mode=...,gs3,...", and using the google-specific host.
+* _zoh_ -- The _zoh_ scheme is equivalent to "http" plus setting "mode=...,zoh,...", plus using a host that leads to a server supporting the ZoH REST API.
+
+Note that currently there is no "zip:" protocol so it must be inferred or specified by a _mode_ tag.
## Client Parameters
@@ -119,11 +124,21 @@ The fragment part of a URL is used to specify information that is interpreted to
For reading, _key=value_ pairs are provided for specifying the storage format.
- mode=nczarr|zarr
+The _zarr_ mode implies restricting the format to the pure Zarr V2 or V3 format.
+The _nczarr_ mode implies using the netcdf Zarr extensions.
+
Additional pairs are provided to specify the Zarr version.
-- mode=v2
+- mode=v2|v3
+
+Obviously, _v2_ implies using the Zarr Version 2 format; similarly for _v3_.
-Additional pairs are provided to specify the storage medium: Amazon S3 vs File tree vs Zip file.
-- mode=file|zip|s3
+Additional pairs are provided to specify the storage medium: Amazon S3 vs File vs, etc.
+- mode=file|zip|s3|gs3|zoh
+
+The modes imply use of a specific driver:
+* The _s3_ driver stores data using Amazon S3 or some equivalent.
+* The _file_ driver stores data in a directory tree.
+* The _zip_ driver stores data in a local zip file.
Note that when reading, an attempt will be made to infer the
format and Zarr version and storage medium format by probing the
@@ -131,35 +146,24 @@ file. If inferencing fails, then it is reported. In this case,
the client may need to add specific mode flags to avoid
inferencing.
-Typically one will specify three mode flags: one to indicate what format
-to use and one to specify the way the dataset is to be stored.
-For example, a common one is "mode=zarr,file"
-
-
-Obviously, when creating a file, inferring the type of file to create
-is not possible so the mode flags must be set specifically.
-This means that both the storage medium and the exact storage
-format must be specified.
-Using _mode=nczarr_ causes the URL to be interpreted as a
-reference to a dataset that is stored in NCZarr format.
-The _zarr_ mode tells the library to use NCZarr, but to restrict its operation to operate on pure Zarr.
-
-
-The modes _s3_, _file_, and _zip_ tell the library what storage medium
-driver to use.
-* The _s3_ driver stores data using Amazon S3 or some equivalent.
-* The _file_ driver stores data in a directory tree.
-* The _zip_ driver stores data in a local zip file.
+Obviously, when creating a file, inferencing is not
+possible so the mode flags must be set specifically.
+In the most general case, one will specify three mode flags: one to indicate what format
+to use, one to specify the way the dataset is to be stored,
+and one to specifiy the Zarr format version.
+For example, a common one is "mode=zarr,file,v2"
+If not specified, the version will be the default specified when
+the netcdf-c library was built.
As an aside, it should be the case that zipping a _file_
format directory tree will produce a file readable by the
-_zip_ storage format, and vice-versa.
+_zip_ storage format, and vice-versa. This may change depending
+on the outcome of current deliberations by the Zarr committee.
By default, the XArray convention is supported for Zarr Version 2
-and used for both NCZarr files and pure Zarr files.
-
+and used for both NCZarr files and pure Zarr files. It is not
+needed for Version 3, which has an equivalent array metadata key
+called "dimension_names"
This means that every variable in the root group whose named dimensions
are also in the root group will have an attribute called
*\_ARRAY\_DIMENSIONS* that stores those dimension names.
@@ -196,7 +200,8 @@ An important restriction is placed on the structure of the tree,
namely that keys are only defined for content-bearing objects.
Further, all the leaves of the tree are these content-bearing objects.
This means that the key for one content-bearing object should not
-be a prefix of any other key.
+be a prefix of any other key. For example and given the key "/x/y/zarr.json",
+there should not exist any other key with the same prefix, "/x/y/zarr.json/z" for example.
There several other concepts of note.
1. __Dataset__ - a dataset is the complete tree contained by the key defining
@@ -207,23 +212,23 @@ and "contains" data in the form of an arbitrary sequence of 8-bit bytes.
The zmap API defined here isolates the key-value pair mapping
code from the Zarr-based implementation of NetCDF-4.
- It wraps an internal C dispatch table manager for implementing an
+It wraps an internal C dispatch table manager for implementing an
abstract data structure implementing the zmap key/object model.
Of special note is the "search" function of the API.
__Search__: The search function has two purposes:
1. Support reading of pure zarr datasets (because they do not explicitly track their contents).
-2. Debugging to allow raw examination of the storage. See zdump for example.
+2. Debugging to allow raw examination of the storage. See _zdump_ for example.
The search function takes a prefix path which has a key syntax (see above).
-The set of legal keys is the set of keys such that the key references a content-bearing object -- e.g. /x/y/.zarray or /.zgroup.
-Essentially this is the set of keys pointing to the leaf objects of the tree of keys constituting a dataset.
-This set potentially limits the set of keys that need to be examined during search.
-
The search function returns a limited set of names, where the set of names are immediate suffixes of a given prefix path.
-That is, if _\_ is the prefix path, then search returnsnall _\_ such that _\/\_ is itself a prefix of a "legal" key.
+That is, if _\_ is the prefix path, then search returns all _\_ such that _\/\_ is itself a prefix of a "legal" key.
This can be used to implement glob style searches such as "/x/y/*" or "/x/y/**"
+The term "legal keys" is the set of keys such that the key references a content-bearing object -- e.g. /x/y/.zarray or /.zgroup.
+Essentially this is the set of keys pointing to the leaf objects of the tree of keys constituting a dataset.
+This set potentially limits the set of keys that need to be examined during search.
+
This semantics was chosen because it appears to be the minimum required to implement all other kinds of search using recursion.
It was also chosen to limit the number of names returned from the search.
Specifically
@@ -250,7 +255,7 @@ so they are not included in the zmap data structure.
__A Note on Error Codes:__
-The zmap API returns some distinguished error code:
+The zmap API returns some distinguished error codes:
1. NC_NOERR if a operation succeeded
2. NC_EEMPTY is returned when accessing a key that has no content.
3. NC_EOBJECT is returned when an object is found which should not exist
@@ -263,12 +268,11 @@ But this does not propagate outside the zmap_file implementation.
## Zmap Implementatons
-The primary zmap implementation is _s3_ (i.e. _mode=nczarr,s3_) and indicates that the Amazon S3 cloud storage -- or some related applicance -- is to be used.
-Another storage format uses a file system tree of directories and files (_mode=nczarr,file_).
-A third storage format uses a zip file (_mode=nczarr,zip_).
-The latter two are used mostly for debugging and testing.
-However, the _file_ and _zip_ formats are important because they are intended to match corresponding storage formats used by the Python Zarr implementation.
-Hence it should serve to provide interoperability between NCZarr and the Python Zarr, although this interoperability has had only limited testing.
+The primary zmap implementation is _s3_ (i.e. _mode=zarr,s3_) and indicates that the Amazon S3 cloud storage -- or some related applicance -- is to be used.
+Another storage format uses a file system tree of directories and files (_mode=zarr,file_).
+A third storage format uses a zip file (_mode=zarr,zip_).
+The _file_ and _zip_ formats are important because they are intended to match corresponding storage formats used by the Python Zarr implementation.
+Hence they should serve to provide interoperability between NCZarr and the Python Zarr, although this interoperability has had only limited testing.
Examples of the typical URL form for _file_ and _zip_ are as follows.
````
@@ -297,9 +301,9 @@ This requirement imposed some constraints on the reading of Zarr datasets using
1. Zarr allows some primitive types not recognized by NCZarr.
Over time, the set of unrecognized types is expected to diminish.
Examples of currently unsupported types are as follows:
- * "c" -- complex floating point
- * "m" -- timedelta
- * "M" -- datetime
+ * "c" -- complex floating point
+ * "m" -- timedelta
+ * "M" -- datetime
2. The Zarr dataset may reference filters and compressors unrecognized by NCZarr.
3. The Zarr dataset may store data in column-major order instead of row-major order. The effect of encountering such a dataset is to output the data in the wrong order.
@@ -316,7 +320,7 @@ A good value of _n_ is 9.
# Zip File Support {#nczarr_zip}
In order to use the _zip_ storage format, the libzip [3] library must be installed.
-Note that this is different from zlib.
+Note that this is different from zlib (aka "deflate").
## Addressing Style
@@ -326,14 +330,14 @@ The notion of "addressing style" may need some expansion. Amazon S3 accepts two
For example:
```
-https://.s2.<region>.amazonaws.com/
+https://.s2..amazonaws.com/
```
2. Path -- the path addressing style places the bucket in at the front of the path part of a URL.
For example:
```
-https://s3.<region>.amazonaws.com//
+https://s3..amazonaws.com//
```
The NCZarr code will accept either form, although internally, it is standardized on path style.
@@ -346,17 +350,17 @@ The reason for this is that the bucket name forms the initial segment in the key
The NCZarr storage format is almost identical to that of the the standard Zarr format.
The data model differs as follows.
-1. Zarr only supports anonymous dimensions -- NCZarr supports only shared (named) dimensions.
-2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string.
-3. Zarr does not explicitly support unlimited dimensions -- NCZarr does support them.
+1. Zarr only supports anonymous dimensions (plus a limited set of names via _\_ARRAY_ATTRIBUTES_) -- NCZarr supports only shared (named) dimensions, but can read anonymous dimensions by assigning special names to the anonymous dimensions.
+2. Zarr attributes are untyped -- or perhaps more correctly characterized as of type string (in "JSON" format). NCZarr supports typing of attributes.
+3. Zarr might not explicitly support unlimited dimensions (the documentation is unclear) -- NCZarr does support them.
## Storage Medium
Consider both NCZarr and Zarr, and assume S3 notions of bucket and object.
-In both systems, Groups and Variables (Array in Zarr) map to S3 objects.
+In both systems, Groups and Variables (aka Arrays in Zarr) map to S3 objects.
Containment is modeled using the fact that the dataset's key is a prefix of the variable's key.
-So for example, if variable _v1_ is contained in top level group g1 -- _/g1 -- then the key for _v1_ is _/g1/v_.
-Additional meta-data information is stored in special objects whose name start with ".z".
+So for example, if variable _v1_ is contained in top level group _g1_ (i.e. _/g1_) -- then the key for _v1_ is _/g1/v1_.
+Additional meta-data information is stored in special objects whose name start with ".z" (V2) or "zarr.json" (V3).
In Zarr Version 2, the following special objects exist.
1. Information about a group is kept in a special object named _.zgroup_;
@@ -366,6 +370,7 @@ so for example the object _/g1/v1/.zarray_.
3. Group-level attributes and variable-level attributes are stored in a special object named _.zattr_;
so for example the objects _/g1/.zattr_ and _/g1/v1/.zattr_.
4. Chunk data is stored in objects named "\.\...,\" where the ni are positive integers representing the chunk index for the ith dimension.
+Note that the character '/' can substiture for the '.' character in the chunk name.
The first three contain meta-data objects in the form of a string representing a JSON-formatted dictionary.
The NCZarr format uses the same objects as Zarr, but inserts NCZarr
@@ -373,17 +378,17 @@ specific attributes in the *.zattr* object to hold NCZarr specific information
The value of each of these attributes is a JSON dictionary containing a variety
of NCZarr specific information.
-These NCZarr-specific attributes are as follows:
+These attributes are as follows:
-_\_nczarr_superblock\__ -- this is in the top level group's *.zattr* object.
+_\_nczarr_superblock\__ -- this attribute key is in the top level group's *.zattr* object.
It is in effect the "superblock" for the dataset and contains
any netcdf specific dataset level information.
It is also used to verify that a given key is the root of a dataset.
-Currently it contains keys that are ignored and exist only to ensure that
+Currently it contains one key that is ignored and is only to ensure that
older netcdf library versions do not crash.
* "version" -- the NCZarr version defining the format of the dataset (deprecated).
-_\_nczarr_group\__ -- this key appears in every group's _.zattr_ object.
+_\_nczarr_group\__ -- this attribute key appears in every group's _.zattr_ object.
It contains any netcdf specific group information.
Specifically it contains the following keys:
* "dimensions" -- the name and size of shared dimensions defined in this group, as well an optional flag indictating if the dimension is UNLIMITED.
@@ -391,60 +396,97 @@ Specifically it contains the following keys:
* "groups" -- the name of sub-groups defined in this group.
These lists allow walking the NCZarr dataset without having to use the potentially costly search operation.
-_\_nczarr_array\__ -- this key appears in the *.zattr* object associated
+_\_nczarr_array\__ -- this attribute key appears in the *.zattr* object associated
with a _.zarray_ object.
It contains netcdf specific array information.
Specifically it contains the following keys:
-* dimension_references -- the fully qualified names of the shared dimensions referenced by the variable.
-* storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. Also signals if a variable is scalar.
+* dimension_references -- the names of the shared dimensions referenced by the variable.
+* storage -- indicates if the variable is chunked vs contiguous in the netcdf sense.
-_\_nczarr_attr\__ -- this attribute appears in every _.zattr_ object.
+_\_nczarr_attr\__ -- this attribute key appears in every _.zattr_ object.
Specifically it contains the following keys:
-* types -- the types of all attributes in the _.zattr_ object.
+* types -- the types of all of the attributes in the _.zattr_ object.
## Translation {#nczarr_translation}
-With some loss of netcdf-4 information, it is possible for an nczarr library to read the pure Zarr format and for other zarr libraries to read the nczarr format.
+With some constraints, it is possible for an nczarr library to read the pure Zarr format and for other zarr libraries to read the nczarr format.
-The latter case, zarr reading nczarr, is trival because all of the nczarr metadata is stored as ordinary, String valued (but JSON syntax), attributes.
+The latter case should require no special decoding by the non-nczarr library
+because all nczarr specific extensions are encoded to appear as ordinary
+zarr attributes.
-The former case, nczarr reading zarr is possible assuming the nczarr code can simulate or infer the contents of the missing _\_nczarr\_xxx_ attributes.
+The former case -- nczarr reading zarr -- is possible if the nczarr code can simulate or infer the contents of the missing _\_nczarr\_xxx_ attributes.
As a rule this can be done as follows.
-1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group.
-The search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables).
-Constructing the set of "shared dimensions" is carried out
+1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. Alternatively, the root group may contain a _.zmetadata_ object that can be used to determine the lists of variables and subgroups.
+
+ For V2, the search looks for occurrences of _.zmetadata_, _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables).
+For V3, the search looks for occurrences of _zarr.json_.
+
+ Constructing the set of "shared dimensions" is carried out
by walking all the variables in the whole dataset and collecting
the set of unique integer shapes for the variables.
-For each such dimension length, a top level dimension is created
+For each such dimension length, a dimension is created in the root group
named "_Anonymous_Dimension_" where len is the integer length.
-2. _\_nczarr_array\__ -- The dimension referencess are inferred by using the shape in _.zarray_ and creating references to the simulated shared dimensions.
-netcdf specific information.
+2. _\_nczarr_array\__ -- The dimension references are inferred by using the shape
+in _.zarray_ (or _zarr.json_) and creating references to the simulated shared dimension.
3. _\_nczarr_attr\__ -- The type of each attribute is inferred by trying to parse the first attribute value string.
# Compatibility {#nczarr_compatibility}
In order to accomodate existing implementations, certain mode tags are provided to tell the NCZarr code to look for information used by specific implementations.
-## XArray
+## The "xarray" Mode
-The Xarray [7] Zarr implementation uses its own mechanism for specifying shared dimensions.
+The xarray [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification) Zarr implementation uses its own mechanism for specifying an approximation to shared dimensions.
It uses a special attribute named ''_ARRAY_DIMENSIONS''.
The value of this attribute is a list of dimension names (strings).
An example might be ````["time", "lon", "lat"]````.
-It is almost equivalent to the ````_nczarr_array "dimension_references" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. The Xarray dimension list differs from the netcdf-4 shared dimensions in two ways.
-1. Specifying Xarray in a non-root group has no meaning in the current Xarray specification.
-2. A given name can be associated with different lengths, even within a single array. This is considered an error in NCZarr.
+It is essentially equivalent to the ````_nczarr_array "dimension_references" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset.
-The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr.
+The xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr.
If possible, this attribute will be read/written by default,
but can be suppressed if the mode value "noxarray" is specified.
If detected, then these dimension names are used to define shared dimensions.
-The following conditions will cause ''_ARRAY_DIMENSIONS'' to not be written.
+Any of the following conditions will cause ''_ARRAY_DIMENSIONS'' not to be written.
* The variable is not in the root group,
* Any dimension referenced by the variable is not in the root group.
+* ''_ARRAY_DIMENSIONS'' assigns conflicting sizes to a dimension name.
Note that this attribute is not needed for Zarr Version 3, and is ignored.
+## The ".zmetdata" Mode
+The NCzarr implementation of Version 2 also support the ".zmetadata" convention.
+This convention adds an extra, root-level object called ".zmetadata".
+This object is a JSON dictionary with this form:
+````
+{"metadata":
+ {
+ "": ,
+ "": ,
+ ...
+ "":
+ },
+"zarr_consolidated_format":1
+}
+````
+Each <key> refers to a content-bearing object and the <contents> is the JSON content of that object.
+An example might look as follows:
+````
+{
+ "metadata":
+ {
+ ".zgroup": {"zarr_format": 2},
+ ".zattr": {"globalfloat": 1},
+ "v/.zarray": {"zarr_format": 2, "shape": [1], "dtype": ".amazonaws.com/datasetbucket/rootkey\#mode=nczarr&awsprofile=unidata"
````
+
+# References {#nczarr_bib}
+
+[1] [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/index.html)
+[2] [Amazon Simple Storage Service Library](https://github.com/aws/aws-sdk-cpp)
+[3] [The LibZip Library](https://libzip.org/)
+[4] [NetCDF ZARR Data Model Specification](https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf-zarr-data-model-specification)
+[5] [Python Documentation: 8.3.
+collections — High-performance dataset datatypes](https://docs.python.org/2/library/collections.html)
+[6] [Zarr Version 2 Specification](https://zarr.readthedocs.io/en/stable/spec/v2.html)
+[7] [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification)
+[8] [Dynamic Filter Loading](https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf)
+[9] [Officially Registered Custom HDF5 Filters](https://portal.hdfgroup.org/display/support/Registered+Filter+Plugins)
+[10] [C-Blosc Compressor Implementation](https://github.com/Blosc/c-blosc)
+[11] [Conda-forge packages / aws-sdk-cpp](https://anaconda.org/conda-forge/aws-sdk-cpp)
+[12] [GDAL Zarr](https://gdal.org/drivers/raster/zarr.html)
+[13] [NetCDF ZARR Data Model Specification Version 3](https://zarr-specs.readthedocs.io/en/latest/specs.html)
+
# Appendix A. Building NCZarr Support {#nczarr_build}
Currently the following build cases are known to work.
@@ -501,6 +561,7 @@ The relevant ./configure options are as follows.
The relevant CMake flags are as follows.
1. *-DNETCDF_ENABLE_NCZARR=off* -- equivalent to the Automake *--disable-nczarr* option.
+
## Testing NCZarr S3 Support {#nczarr_testing_S3_support}
The relevant tests for S3 support are in the _nczarr_test_ directory.
@@ -519,6 +580,10 @@ also test S3 support with this option.
````
--with-s3-testing=yes
````
+Otherwise, read-only testing is possible by specifying
+````
+--with-s3-testing=public
+````
### NetCDF CMake Build
@@ -551,7 +616,7 @@ This affects the depth to which groups can be nested because the key encodes the
# Appendix C. JSON Attribute Convention. {#nczarr_json}
-The Zarr V2 specification is somewhat vague on what is a legal
+The Zarr V2 (and V3) specification is somewhat vague on what is a legal
value for an attribute. The examples all show one of two cases:
1. A simple JSON scalar atomic values (e.g. int, float, char, etc), or
2. A JSON array of such values.
@@ -623,7 +688,7 @@ and then store it as the equivalent netcdf vector.
* If the dtype is not defined, then infer the dtype based on the first JSON value in the array,
and then store it as the equivalent netcdf vector.
-3. The attribute is any other JSON structure.
+3. All other JSON-expressions.
* Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR.
## Notes
@@ -678,22 +743,23 @@ For writing variables and NCZarr attributes, the type mapping is as follows:
Admittedly, this encoding is a bit of a hack.
So when reading data with a pure zarr implementaion
-the above types should always appear as strings,
+attributes with the above types should always appear as strings,
and the type that signals NC_CHAR (in NCZarr)
would be handled by Zarr as a string of length 1.
-
-
-# References {#nczarr_bib}
-
-[1] [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/index.html)
-[2] [Amazon Simple Storage Service Library](https://github.com/aws/aws-sdk-cpp)
-[3] [The LibZip Library](https://libzip.org/)
-[4] [NetCDF ZARR Data Model Specification](https://www.unidata.ucar.edu/blogs/developer/en/entry/netcdf-zarr-data-model-specification)
-[5] [Python Documentation: 8.3.
-collections — High-performance dataset datatypes](https://docs.python.org/2/library/collections.html)
-[6] [Zarr Version 2 Specification](https://zarr.readthedocs.io/en/stable/spec/v2.html)
-[7] [XArray Zarr Encoding Specification](http://xarray.pydata.org/en/latest/internals.html#zarr-encoding-specification)
-[8] [Dynamic Filter Loading](https://support.hdfgroup.org/HDF5/doc/Advanced/DynamicallyLoadedFilters/HDF5DynamicallyLoadedFilters.pdf)
-[9] [Officially Registered Custom HDF5 Filters](https://portal.hdfgroup.org/display/support/Registered+Filter+Plugins)
-[10] [C-Blosc Compressor Implementation](https://github.com/Blosc/c-blosc)
-[11] [Conda-forge packages / aws-sdk-cpp](https://anaconda.org/conda-forge/aws-sdk-cpp)
-[12] [GDAL Zarr](https://gdal.org/drivers/raster/zarr.html)
-
# Change Log {#nczarr_changelog}
[Note: minor text changes are not included.]
@@ -866,6 +911,9 @@ intended to be a detailed chronology. Rather, it provides highlights
that will be of interest to NCZarr users. In order to see exact changes,
It is necessary to use the 'git diff' command.
+## 01/16/2025
+1. Document the addition of .zmetadata support.
+
## 03/31/2024
1. Document the change to V2 to using attributes to hold NCZarr metadata.
@@ -891,4 +939,4 @@ include arbitrary JSON expressions; see Appendix D for more details.
__Author__: Dennis Heimbigner
__Email__: dmh at ucar dot edu
__Initial Version__: 4/10/2020
-__Last Revised__: 4/02/2024
+__Last Revised__: 1/16/2025
diff --git a/docs/quickstart_paths.md b/docs/quickstart_paths.md
index 4481f756b0..4a7e8cc6e5 100644
--- a/docs/quickstart_paths.md
+++ b/docs/quickstart_paths.md
@@ -62,20 +62,7 @@ The query and fragment are optional.
### Examples of URL Paths for NetCDF-C
* https://thredds.ucar.edu/catalog
-## Addendum A. Amazon S3 Specific URLS {#nc_paths_s3_urls}
-A URL path is required for accessing datasets on the Amazon S3 storage cloud.
-Unfortunately S3 URLs are complicated.
-It has the following features:
-* Protocol: _https_ or _s3_. The _s3_ scheme is equivalent to "https" plus setting various tags in the query and/or fragment part of the URL.
-* Host: Amazon S3 defines three forms: _Virtual_, _Path_, and _S3_
- + _Virtual_: the host includes the bucket name as in __bucket.s3.<region>.amazonaws.com__ or __bucket.s3.amazonaws.com__
- + _Path_: the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.<region>.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__
- + _S3_: the protocol is "s3:" and if the host is a single name, then it is interpreted as the bucket. The region is determined using an algorithm defined in the nczarr documentation.
- + _Other_: It is possible to use other non-Amazon cloud storage, but that is cloud library dependent.
-* Query: currently not used.
-* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
-
-## Addendum B. Known Fragment Keys {#nc_paths_frag_keys}
+## Addendum A. Known Fragment Keys {#nc_paths_frag_keys}
The fragment part of a URL is used to pass information deep into
the netcdf-c library to control its actions.
@@ -84,8 +71,8 @@ This appendix list known keys, although it may be somewhat out-of-date.
The current set of keys used in the netcdf-c library is as follows.
* _mode_ -- A special key that is used to provide single values for controlling the netcdf-c library. It consists of a comma separated sequence of values
primarily used to control the file format.
-The *mode* key supports the following values
- - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol
+The *mode* key currently supports the following values
+ - _dap2_ -- Specifies that the URL accesses a resource using the DAP2 protocol (default if no mode is specified)
- _dap4_ -- Specifies that the URL accesses a resource using the DAP4 protocol
- _netcdf-3_ -- Specifies that a file is a netcdf-classic file
- _classic_ -- Alias for _netcdf-3_
@@ -101,6 +88,7 @@ The *mode* key supports the following values
- _file_ --Specifies that the file is an NCZarr/Zarr file stored as a file tree
- _zip_ --Specifies that the file is an NCZarr/Zarr file stored as a zip file
- _bytes_ -- Specifies that the file is remote and is to be read using byte-range support
+ - _zoh_ --Specifies that the file is remote and supports the [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol
in NCZarr format
* _dap2_ -- equivalent to "mode=dap2"
* _dap4_ -- equivalent to "mode=dap4"
@@ -108,3 +96,51 @@ The *mode* key supports the following values
* _log_ -- turn on logging for the duration of the data request
* _show=fetch_ -- log curl fetch commands
+## Addendum B. Amazon S3 Specific URLS {#nc_paths_s3_urls}
+A URL path is required for accessing datasets on the Amazon S3 storage cloud.
+Unfortunately S3 URLs are complicated.
+They can have the following forms:
+* _Virtual_: the protocol is "http:" or "https:", the mode specifies "s3", and the host starts with the bucket name; e.g. __bucket.s3.<region>.amazonaws.com__ or __bucket.s3.amazonaws.com__
+* _Path_: the protocol is "http:" or "https:", the mode specifies "s3", and the host does not include the bucket name, but rather the bucket name is the first segment of the path. For example __s3.<region>.amazonaws.com/bucket__ or __s3.amazonaws.com/bucket__
+* _Protocol_: the protocol is "s3:" and if the host is a single name, then it is interpreted as the bucket. The region is determined using an algorithm defined in the nczarr documentation.
+
+For all of the above URL forms, there are two additional pieces.
+* Query: currently not used.
+* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
+
+## Addendum C. Google Storage Specific URLS {#nc_paths_google_urls}
+Google provides an interface to its storage that is compatible with the Amazon S3 REST API.
+A URL path is required for accessing datasets on the Google storage cloud.
+Note that the Google host is always "storage.googleapis.com" and has no concept of region.
+It has the following forms.
+* _Path_: the bucket name is the first segment of the path.
+For example __storage.googleapis.com/bucket__.
+* _Protocol_: the protocol is "gs3:" and if the host is a single name, then it is interpreted as the bucket. The _gs3_ scheme is equivalent to "https" plus setting various tags in the query and/or fragment part of the URL.
+For example __gs3://bucket/__.
+
+For all of the above URL forms, there are two additional pieces.
+* Query: currently not used.
+* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
+
+## Addendum D. Zarr-Over-HTTP (ZoH) Protocol Specific URLS {#nc_paths_zoh_urls}
+The [GWDG ZoH](https://pad.gwdg.de/DtHGRP38Sw2YQDAAjPuP2Q) (Zarr-over-HTTP) protocol provides an interface to any server supporting the ZoH REST API.
+The URLs for this API are very similar to the S3 or Google URLs.
+Note the _virtual_ URL format is not currently supported.
+A ZoH URL has one of the following forms.
+* _Path_: the protocol is "http:" or "https:", the host is any standard host (including an optional port number), and the bucket name is the first segment of the path.
+For example __http://zoh.gwdg.de/<bucket>/<key>__.
+* _Protocol_: the protocol is "zoh:" and a complete host must be specified.
+The URL path part is the key to be interpreted by the ZoH server
+as it wishes.
+
+For all of the above URL forms, there are two additional pieces.
+* Query: currently not used.
+* Fragment: the fragment is of the form _key=value&key=value&..._. Depending on the key, the _value_ part may be left out and some default value will be used. The exact set of possible keys is defined in the nczarr documentation.
+
+## Point of Contact {#nc_paths_poc}
+
+__Author__: Dennis Heimbigner
+__Email__: dmh at ucar dot edu
+__Initial Version__: 4/10/2020
+__Last Revised__: 1/16/2025
+
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index d58d7f7aa7..96d2d7cbc1 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -4,6 +4,8 @@
# University Corporation for Atmospheric Research/Unidata.
# See netcdf-c/COPYRIGHT file for more info.
+
+
#####
# Installation of various netCDF headers.
#####
@@ -56,6 +58,10 @@ ADD_EXTRA_DIST("${CUR_EXTRA_DIST}")
# One time read ncextern.h
file(READ ncexternl.h NCEXTH0)
+#####
+# Built Sources
+#####
+
# Built source: netcdf_json.h
file(READ ncjson.h JSONH0)
STRING(REPLACE "NCJSON_H" "NETCDF_JSON_H" JSONH1 "${JSONH0}")
diff --git a/include/Makefile.am b/include/Makefile.am
index f47bdf4dd2..07e663779d 100644
--- a/include/Makefile.am
+++ b/include/Makefile.am
@@ -10,7 +10,7 @@
include_HEADERS = netcdf.h netcdf_meta.h netcdf_mem.h netcdf_aux.h \
netcdf_filter.h netcdf_filter_build.h netcdf_filter_hdf5_build.h \
-netcdf_dispatch.h
+netcdf_dispatch.h netcdf_vutils.h
include_HEADERS += netcdf_json.h netcdf_proplist.h
@@ -48,21 +48,26 @@ BUILT_SOURCES = netcdf_json.h netcdf_proplist.h
# marked with a macro (OPTSTATIC) that marks the entry point as
# static inside netcdf_json.h. This is an ugly hack to avoid
# having to reference libnetcdf in the nczarr code wrappers.
-# Note also that we incorporate the core of ncexternl.h into the netcdf_json.h file.
+# Note that the file is built in builddir in case the build
+# is out of source.
# Give the recipe for building netcdf_json.h
netcdf_json.h: ${top_srcdir}/libdispatch/ncjson.c ${top_srcdir}/include/ncjson.h ${top_srcdir}/include/ncexternl.h
rm -fr ${builddir}/netcdf_json.h
- cat ${srcdir}/ncjson.h | sed -e 's/NCJSON_H/NETCDF_JSON_H/' >> ${builddir}/netcdf_json.h
- echo '#ifdef NETCDF_JSON_H' >> ${builddir}/netcdf_json.h
- sed -e '/ncjson.h/d' < ${srcdir}/../libdispatch/ncjson.c >> ${builddir}/netcdf_json.h
+ cat ${srcdir}/ncjson.h \
+ | sed -e '/!NCJSON_H/d' \
+ | sed -e 's/NCJSON_H/NETCDF_JSON_H/' \
+ >> ${builddir}/netcdf_json.h
+ cat ${srcdir}/../libdispatch/ncjson.c | sed -e '/ncjson.h/d' >> ${builddir}/netcdf_json.h
echo '#endif /*NETCDF_JSON_H*/' >> ${builddir}/netcdf_json.h
# netcdf_proplist is analogous to netcdf_json but, of course, using libdispatch/ncproplist.c and include/ncproplist.h
# Give the recipe for building netcdf_proplist.h. Basically same as for netcdf_json.h
netcdf_proplist.h: ${top_srcdir}/libdispatch/ncproplist.c ${top_srcdir}/include/ncproplist.h ${top_srcdir}/include/ncexternl.h
rm -fr ${builddir}/netcdf_proplist.h
- cat ${srcdir}/ncproplist.h | sed -e 's/NCPROPLIST_H/NETCDF_PROPLIST_H/' >> ${builddir}/netcdf_proplist.h
- echo '#ifdef NETCDF_PROPLIST_H' >> ${builddir}/netcdf_proplist.h
- sed -e '/ncproplist.h/d' < ${srcdir}/../libdispatch/ncproplist.c >> ${builddir}/netcdf_proplist.h
+ cat ${srcdir}/ncproplist.h \
+ | sed -e '/!NCPROPLIST_H/d' \
+ | sed -e 's/NCPROPLIST_H/NETCDF_PROPLIST_H/' \
+ >> ${builddir}/netcdf_proplist.h
+ cat ${srcdir}/../libdispatch/ncproplist.c | sed -e '/ncproplist.h/d' >> ${builddir}/netcdf_proplist.h
echo '#endif /*NETCDF_PROPLIST_H*/' >> ${builddir}/netcdf_proplist.h
diff --git a/include/nc4internal.h b/include/nc4internal.h
index 21157e9f9a..7e64dd3ef3 100644
--- a/include/nc4internal.h
+++ b/include/nc4internal.h
@@ -43,7 +43,7 @@
/* typedef enum {GET, PUT} NC_PG_T; */
/** These are the different objects that can be in our hash-lists. */
-typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT;
+typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFILE} NC_SORT;
/** The netCDF V2 error code. */
#define NC_V2_ERR (-1)
@@ -100,6 +100,8 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT;
# define VIRTUALFLAG 8
/** Per-variable attribute, as opposed to global */
# define VARFLAG 16
+ /** If written via NCZarr, then is a complex json attribute */
+# define COMPLEXJSON 32
/** Boolean type, to make the code easier to read. */
typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t;
@@ -460,24 +462,39 @@ extern int nc_get_alignment(int* thresholdp, int* alignmentp);
/**************************************************/
/* Begin to collect global state info in one place (more to do) */
+#ifdef WATCH
+extern NClist* pluginpaths;
+extern NClist* zpluginpaths;
+#define PLUGINPATHS pluginpaths
+#define ZPLUGINPATHS zpluginpaths
+#else
+#define PLUGINPATHS gs->pluginpaths
+#define ZPLUGINPATHS gs->zarr.pluginpaths
+#endif
+
typedef struct NCglobalstate {
int initialized;
char* tempdir; /* track a usable temp dir */
char* home; /* track $HOME */
char* cwd; /* track getcwd */
struct NCRCinfo* rcinfo; /* Currently only one rc file per session */
+#ifndef WATCH
NClist* pluginpaths; /* Global Plugin State */
+#endif
struct GlobalZarr { /* Zarr specific parameters */
char dimension_separator;
int default_zarrformat;
+#ifndef WATCH
NClist* pluginpaths; /* NCZarr mirror of plugin paths */
- NClist* codec_defaults;
- NClist* default_libs;
- /* All possible HDF5 filter plugins */
- /* Consider onverting to linked list or hash table or
+#endif
+ NClist* codec_defaults; /* NClist */
+ NClist* default_libs; /* NClist */
+ /* All possible HDF5 filter plugins (except hdf5raw */
+ /* Consider converting to linked list or hash table or
equivalent since very sparse */
struct NCZ_Plugin** loaded_plugins; //[H5Z_FILTER_MAX+1];
size_t loaded_plugins_max; /* plugin filter id index. 0
#endif
+#ifdef HAVE_STRING_H
+#include
+#endif
+#ifdef HAVE_UNISTD_H
+#include
+#endif
+#ifdef __APPLE__ /* GCC strikes again */
+#ifndef uint
+typedef unsigned int uint;
+#endif
+#ifndef ushort
+typedef unsigned short ushort;
+#endif
+#endif /*__APPLE__*/
+
+#include
+#include
+
+#ifdef _WIN32
+#include
+#include
+#endif
/*
This is included in bottom
@@ -49,7 +71,7 @@ typedef int mode_t;
#define F_OK 00
#endif
-#endif
+#endif /*_WIN32*/
/*Warning: Cygwin with -ansi does not define these functions
in its headers.*/
@@ -134,6 +156,9 @@ unsigned long long int strtoull(const char*, char**, int);
#endif /*_WIN32*/
#ifndef nulldup
+#ifndef _WIN32
+#pragma GCC diagnostic ignored "-Wnonnull"
+#endif
#define nulldup(s) ((s)==NULL?NULL:strdup(s))
#endif
@@ -170,7 +195,6 @@ typedef unsigned long long uint64;
typedef unsigned long long uint64_t;
#endif
-#ifndef _WIN32
#ifndef HAVE_UINTPTR_T
#ifndef uintptr_t
#if SIZEOF_VOIDP == 8
@@ -180,7 +204,6 @@ typedef unsigned long long uint64_t;
#endif
#endif
#endif
-#endif
#ifndef HAVE_SIZE64_T
typedef unsigned long long size64_t;
diff --git a/include/ncjson.h b/include/ncjson.h
index 1de7ca0cbf..7d2f58e660 100644
--- a/include/ncjson.h
+++ b/include/ncjson.h
@@ -3,7 +3,7 @@
*/
#ifndef NCJSON_H
-#define NCJSON_H
+#define NCJSON_H 1
#ifndef OPTEXPORT
#ifdef NETCDF_JSON_H
@@ -11,13 +11,18 @@
#else /*!NETCDF_JSON_H*/
#ifdef _WIN32
#define OPTEXPORT __declspec(dllexport)
-#else
+#else /*!WIN32*/
#define OPTEXPORT extern
-#endif
+#endif /*WIN32*/
#endif /*NETCDF_JSON_H*/
#endif /*OPTEXPORT*/
/**************************************************/
+
+/* Return codes */
+#define NCJ_OK 0 /* must equal NC_NOERR in netcdf.h */
+#define NCJ_ERR (-1) /* must equal NC_ERROR in netcdf.h */
+
/* Json object sorts (note use of term sort rather than e.g. type or discriminant) */
#define NCJ_UNDEF 0
#define NCJ_STRING 1
@@ -30,6 +35,10 @@
#define NCJ_NSORTS 8
+/* Dump/text/unparse flags */
+#define NCJFLAG_NONE 0
+#define NCJFLAG_INDENTED 1
+
/* Define a struct to store primitive values as unquoted
strings. The sort will provide more info. Do not bother with
a union since the amount of saved space is minimal.
@@ -39,8 +48,9 @@ typedef struct NCjson {
int sort; /* of this object */
char* string; /* sort != DICT|ARRAY */
struct NCjlist {
- size_t len;
- struct NCjson** contents;
+ size_t alloc;
+ size_t len;
+ struct NCjson** contents;
} list; /* sort == DICT|ARRAY */
} NCjson;
@@ -48,16 +58,15 @@ typedef struct NCjson {
don't use union so we can know when to reclaim sval
*/
struct NCJconst {int bval; long long ival; double dval; char* sval;};
-#define NCJconst_empty {0,0,0.0,NULL}
/**************************************************/
/* Extended API */
-/* Return 0 if ok else -1 */
+/* Return NCJ_OK if ok else NCJ_ERR */
#if defined(__cplusplus)
extern "C" {
-#endif
+#endif /*__cplusplus*/
/* Parse a string to NCjson*/
OPTEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp);
@@ -78,26 +87,37 @@ OPTEXPORT int NCJnewstring(int sort, const char* value, NCjson** jsonp);
OPTEXPORT int NCJnewstringn(int sort, size_t len, const char* value, NCjson** jsonp);
/* Get dict key value by name */
-OPTEXPORT int NCJdictget(const NCjson* dict, const char* key, const NCjson** valuep);
+OPTEXPORT int NCJdictget(const NCjson* dict, const char* key, NCjson** valuep);
+
+/* Functional version of NCJdictget */
+OPTEXPORT NCjson* NCJdictlookup(const NCjson* dict, const char* key);
/* Convert one json sort to value of another type; don't use union so we can know when to reclaim sval */
OPTEXPORT int NCJcvt(const NCjson* value, int outsort, struct NCJconst* output);
-/* Insert an atomic value to an array or dict object. */
+/* Append an atomic value to an array or dict object. */
OPTEXPORT int NCJaddstring(NCjson* json, int sort, const char* s);
/* Append value to an array or dict object. */
OPTEXPORT int NCJappend(NCjson* object, NCjson* value);
-/* Insert key-value pair into a dict object. key will be copied */
-OPTEXPORT int NCJinsert(NCjson* object, const char* key, NCjson* value);
+/* Append string value to an array or dict object. */
+OPTEXPORT int NCJappendstring(NCjson* object, int sort, const char* s);
-/* Insert key-value pair as strings into a dict object.
- key and value will be copied */
+/* Append int value to an array or dict object. */
+OPTEXPORT int NCJappendint(NCjson* object, long long n);
+
+/* Insert (string)key-(NCjson*)value pair into a dict object. key will be copied; jvalue will not */
+OPTEXPORT int NCJinsert(NCjson* object, const char* key, NCjson* jvalue);
+
+/* Insert key-value pair into a dict object. key and value will be copied */
OPTEXPORT int NCJinsertstring(NCjson* object, const char* key, const char* value);
-/* Insert key-value pair where value is an int */
-OPTEXPORT int NCJinsertint(NCjson* object, const char* key, long long ivalue);
+/* Overwrite key-value pair in a dict object. Act like NCJinsert if key not found */
+OPTEXPORT int NCJoverwrite(NCjson* object, const char* key, NCjson* value);
+
+/* Insert key-value pair into a dict object. key and value will be copied */
+OPTEXPORT int NCJinsertint(NCjson* object, const char* key, long long n);
/* Unparser to convert NCjson object to text in buffer */
OPTEXPORT int NCJunparse(const NCjson* json, unsigned flags, char** textp);
@@ -106,37 +126,57 @@ OPTEXPORT int NCJunparse(const NCjson* json, unsigned flags, char** textp);
OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep);
#ifndef NETCDF_JSON_H
+
/* dump NCjson* object to output file */
OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*);
+
/* convert NCjson* object to output string */
-OPTEXPORT const char* NCJtotext(const NCjson* json);
+OPTEXPORT const char* NCJtotext(const NCjson* json, unsigned flags);
+
+/* Sort a dictionary by key */
+OPTEXPORT void NCJdictsort(NCjson* jdict);
+
#endif /*NETCDF_JSON_H*/
#if defined(__cplusplus)
}
-#endif
+#endif /*__cplusplus*/
/* Getters */
#define NCJsort(x) ((x)->sort)
#define NCJstring(x) ((x)->string)
-#define NCJlength(x) ((x)==NULL ? 0 : (x)->list.len)
-#define NCJdictlength(x) ((x)==NULL ? 0 : (x)->list.len/2)
+#define NCJarraylength(x) ((x)==NULL ? 0 : (x)->list.len)
+#define NCJdictlength(x) ((x)==NULL ? 0 : ((x)->list.len) / 2)
#define NCJcontents(x) ((x)->list.contents)
#define NCJith(x,i) ((x)->list.contents[i])
-#define NCJdictith(x,i) ((x)->list.contents[2*i])
+#define NCJdictkey(x,i) ((x)->list.contents[(i)*2])
+#define NCJdictvalue(x,i) ((x)->list.contents[((i)*2)+1])
/* Setters */
#define NCJsetsort(x,s) (x)->sort=(s)
#define NCJsetstring(x,y) (x)->string=(y)
#define NCJsetcontents(x,c) (x)->list.contents=(c)
-#define NCJsetlength(x,l) (x)->list.len=(l)
+#define NCJsetarraylength(x,l) (x)->list.len=(l)
+#define NCJsetdictlength(x,l) (x)->list.len=((l)*2)
/* Misc */
#define NCJisatomic(j) ((j)->sort != NCJ_ARRAY && (j)->sort != NCJ_DICT && (j)->sort != NCJ_NULL && (j)->sort != NCJ_UNDEF)
/**************************************************/
+/* Error detection helper */
+#undef NCJDEBUG
+#ifdef NCJDEBUG
+static int
+NCJBREAKPOINT(int err)
+{
+ (void)NCJBREAKPOINT;
+ return err;
+}
+#else
+#define NCJBREAKPOINT(err) (err)
+#endif /*NCJDEBUG*/
+#define NCJcheck(expr) do{if((expr) < 0) {stat = NCJBREAKPOINT(NCJ_ERR); goto done;}}while(0)
-#endif /*NCJSON_H*/
-
-
+/**************************************************/
+#endif /*!NCJSON_H*/ /* Leave the ! as a tag for sed */
diff --git a/include/nclog.h b/include/nclog.h
index aea5fb52e1..e848ee3219 100644
--- a/include/nclog.h
+++ b/include/nclog.h
@@ -11,7 +11,7 @@
#include
#include "ncexternl.h"
-#undef NCCATCH
+#define NCCATCH
#define NCENVLOGGING "NCLOGGING"
#define NCENVTRACING "NCTRACING"
@@ -25,7 +25,7 @@
#define NCLOGDEBUG (4) /* Everything */
/* Support ptr valued arguments that are used to store results */
-#define PTRVAL(t,p,d) ((t)((p) == NULL ? (d) : *(p)))
+#define PTRVAL(t,p,d) (((p) == NULL ? (t)(d) : (t)*(p)))
#if defined(_CPLUSPLUS_) || defined(__CPLUSPLUS__)
extern "C" {
diff --git a/include/ncproplist.h b/include/ncproplist.h
index 9561789a6c..044f6ca255 100644
--- a/include/ncproplist.h
+++ b/include/ncproplist.h
@@ -22,12 +22,16 @@
/**************************************************/
/*
This is used to store a property list mapping a small number of
-fixed-sized key strings to an arbitrary uintptr_t value. The
-uintptr_t type is used to ensure that the value can be a pointer or a
-small string upto sizeof(uintptr_t) - 1 (for trailing nul). The big
-problem is reclaiming the value if it a pointer. The fact that the
-number of keys is small makes it feasible to use linear search.
-This is currently only used for plugins, but may be extended to other uses.
+keys to objects. The uintptr_t type is used to ensure that the value can be a pointer or a
+small string upto sizeof(uintptr_t) - 1 (for trailing nul) or an integer constant.
+
+There are two operations that may be defined for the property:
+1. reclaiming the value when proplist is free'd and property value points to allocated data of arbitrary complexity.
+2. coping the value (for cloning) if it points to allocated data of arbitrary complexity.
+
+The fact that the number of keys is small makes it feasible to use
+linear search. This is currently only used for plugins, but may be
+extended to other uses.
*/
/*! Proplist-related structs.
@@ -38,23 +42,40 @@ This is currently only used for plugins, but may be extended to other uses.
1. It is critical that |uintptr_t| == |void*|
*/
-#define NCPROPSMAXKEY 31 /* characters assert (NCPROPSMAXKEY+1)/8 == 0*/
+#define NCPROPSMAXKEY 31 /* characters; assert (NCPROPSMAXKEY+1)/8 == 0*/
-/* Returns 0 => error; 1 => success */
-typedef int (*NCPreclaimfcn)(uintptr_t userdata, const char* key, void* value, uintptr_t size);
+/* Opaque forward */
+struct NCPpair;
-/* The property list proper is a sequence of these objects */
-typedef struct NCProperty {
+/* This function performs all of the following operations on a complex type */
+typedef enum NCPtypeop {NCP_RECLAIM=1,NCP_COPY=2} NCPtypeop;
+
+/* There are three possible types for a property value */
+typedef enum NCPtype {
+ NCP_CONST=0, /* Value is a simple uintptr_t constant */
+ NCP_BYTES=2, /* Value points to a counted sequence of bytes; If a string,
+ then it includes the nul term character */
+ NCP_COMPLEX=3 /* Value points to an arbitraryily complex structure */
+} NCPtype;
+
+/* (Returns < 0 => error) (>= 0 => success) */
+typedef int (*NCPtypefcn)(NCPtypeop op, struct NCPpair* input, struct NCPpair* output);
+
+/* Expose this prefix of NCProperty; used in clone and lookup */
+/* Hold just the key+value pair */
+typedef struct NCPpair {
char key[NCPROPSMAXKEY+1]; /* copy of the key string; +1 for trailing nul */
- uintptr_t flags;
-# define NCPF_SIMPLE (1<<0) /* non-reclaimable */
-# define NCPF_BYTES (1<<1) /* reclaimable bytes */
-# define NCPF_COMPLEX (1<<2) /* extended case */
+ NCPtype sort;
uintptr_t value;
uintptr_t size; /* size = |value| as ptr to memory, if string, then include trailing nul */
- uintptr_t userdata; /* extra data for following functions */
- NCPreclaimfcn reclaim;
-} NCProperty;
+} NCPpair;
+
+/* The property list proper is a sequence of these objects */
+typedef struct NCPproperty {
+ NCPpair pair; /* Allowed by C language standard */
+ uintptr_t userdata; /* extra data for the type function */
+ NCPtypefcn typefcn; /* Process type operations */
+} NCPproperty;
/*
The property list object.
@@ -62,7 +83,7 @@ The property list object.
typedef struct NCproplist {
size_t alloc; /* allocated space to hold properties */
size_t count; /* # of defined properties */
- NCProperty* properties;
+ NCPproperty* properties;
} NCproplist;
/**************************************************/
@@ -72,19 +93,24 @@ typedef struct NCproplist {
extern "C" {
#endif
+/* All int valued functions return < 0 if error; >= 0 otherwise */
+
+
/* Create, free, etc. */
OPTEXPORT NCproplist* ncproplistnew(void);
OPTEXPORT int ncproplistfree(NCproplist*);
-/* Locate a proplist entry */
-OPTEXPORT int ncproplistadd(NCproplist* plist,const char* key, uintptr_t value); /* use when reclaim not needed */
-
/* Insert properties */
OPTEXPORT int ncproplistadd(NCproplist* plist,const char* key, uintptr_t value); /* use when reclaim not needed */
OPTEXPORT int ncproplistaddstring(NCproplist* plist, const char* key, const char* str); /* use when value is simple string (char*) */
-OPTEXPORT int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size); /* use when value is simple ptr and reclaim is simple free function */
-OPTEXPORT int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPreclaimfcn); /* fully extended case */
+/* Insert an instance of type NCP_BYTES */
+OPTEXPORT int ncproplistaddbytes(NCproplist* plist, const char* key, void* value, uintptr_t size);
+
+/* Add instance of a complex type */
+OPTEXPORT int ncproplistaddx(NCproplist* plist, const char* key, void* value, uintptr_t size, uintptr_t userdata, NCPtypefcn typefcn);
+
+/* clone; keys are copies and values are copied using the NCPtypefcn */
OPTEXPORT int ncproplistclone(const NCproplist* src, NCproplist* clone);
/*
@@ -105,4 +131,4 @@ OPTEXPORT int ncproplistith(const NCproplist*, size_t i, char* const * keyp, uin
}
#endif
-#endif /*NCPROPLIST_H*/
+#endif /*!NCPROPLIST_H*/ /* WARNING: Do not remove the !; used in building netcdf_proplist.h */
diff --git a/include/ncrc.h b/include/ncrc.h
index 5705f723ac..a49b06d065 100644
--- a/include/ncrc.h
+++ b/include/ncrc.h
@@ -46,6 +46,7 @@ typedef struct NCRCinfo {
/* Opaque structures */
struct NCS3INFO;
+enum NCS3SVC;
#if defined(__cplusplus)
extern "C" {
@@ -83,6 +84,9 @@ EXTERNL int NC_addmodetag(NCURI* uri, const char* tag);
EXTERNL int NC_split_delim(const char* path, char delim, NClist* segments);
EXTERNL int NC_join(struct NClist* segments, char** pathp);
EXTERNL int NC_joinwith(NClist* segments, const char* sep, const char* prefix, const char* suffix, char** pathp);
+EXTERNL void NC_sortenvv(size_t n, char** envv);
+EXTERNL void NC_sortlist(NClist* l);
+EXTERNL void NC_freeenvv(size_t n, char** envv);
#if defined(__cplusplus)
}
diff --git a/include/ncs3sdk.h b/include/ncs3sdk.h
index adc7e456be..f81f670efc 100644
--- a/include/ncs3sdk.h
+++ b/include/ncs3sdk.h
@@ -14,10 +14,16 @@
/* Track the server type, if known */
typedef enum NCS3SVC {NCS3UNK=0, /* unknown */
- NCS3=1, /* s3.amazon.aws */
- NCS3GS=2 /* storage.googleapis.com */
+ NCS3=1, /* s3.amazon.aws */
+ NCS3GS=2, /* storage.googleapis.com */
+#ifdef NETCDF_ENABLE_ZOH
+ NCS3ZOH=4, /* ZoH Server */
+#endif
} NCS3SVC;
+/* Opaque Handles */
+struct NClist;
+
typedef struct NCS3INFO {
char* host; /* non-null if other*/
char* region; /* region */
@@ -55,9 +61,10 @@ EXTERNL int NC_s3sdkbucketdelete(void* s3client, NCS3INFO* info, char** errmsgp)
EXTERNL int NC_s3sdkinfo(void* client0, const char* bucket, const char* pathkey, unsigned long long* lenp, char** errmsgp);
EXTERNL int NC_s3sdkread(void* client0, const char* bucket, const char* pathkey, unsigned long long start, unsigned long long count, void* content, char** errmsgp);
EXTERNL int NC_s3sdkwriteobject(void* client0, const char* bucket, const char* pathkey, unsigned long long count, const void* content, char** errmsgp);
-EXTERNL int NC_s3sdkclose(void* s3client0, NCS3INFO* info, int deleteit, char** errmsgp);
-EXTERNL int NC_s3sdkgetkeys(void* s3client0, const char* bucket, const char* prefix, size_t* nkeysp, char*** keysp, char** errmsgp);
-EXTERNL int NC_s3sdksearch(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp);
+EXTERNL int NC_s3sdkclose(void* s3client0, char** errmsgp);
+EXTERNL int NC_s3sdktruncate(void* s3client0, const char* bucket, const char* prefix, char** errmsgp);
+EXTERNL int NC_s3sdklist(void* s3client0, const char* bucket, const char* prefix, size_t* nkeysp, char*** keysp, char** errmsgp);
+EXTERNL int NC_s3sdklistall(void* s3client0, const char* bucket, const char* prefixkey0, size_t* nkeysp, char*** keysp, char** errmsgp);
EXTERNL int NC_s3sdkdeletekey(void* client0, const char* bucket, const char* pathkey, char** errmsgp);
/* From ds3util.c */
diff --git a/include/netcdf.h b/include/netcdf.h
index 934bdd998d..61d1bcabde 100644
--- a/include/netcdf.h
+++ b/include/netcdf.h
@@ -342,14 +342,18 @@ there. */
#define NC_SZIP_NN 32 /**< SZIP NN option mask. */
#define NC_SZIP_EC 4 /**< SZIP EC option mask. */
+/* If this is extended, then you need to modify nc4internal.c */
#define NC_NOQUANTIZE 0 /**< No quantization in use. */
#define NC_QUANTIZE_BITGROOM 1 /**< Use BitGroom quantization. */
#define NC_QUANTIZE_GRANULARBR 2 /**< Use Granular BitRound quantization. */
#define NC_QUANTIZE_BITROUND 3 /**< Use BitRound quantization. */
+#define NC_QUANTIZE_MAX NC_QUANTIZE_BITROUND
/**@{*/
/** When quantization is used for a variable, an attribute of the
- * appropriate name is added. */
+ * appropriate name is added.
+ * If this set is extended, then propogate to NC_quantize_atts in nc4internal.c
+ */
#define NC_QUANTIZE_BITGROOM_ATT_NAME "_QuantizeBitGroomNumberOfSignificantDigits"
#define NC_QUANTIZE_GRANULARBR_ATT_NAME "_QuantizeGranularBitRoundNumberOfSignificantDigits"
#define NC_QUANTIZE_BITROUND_ATT_NAME "_QuantizeBitRoundNumberOfSignificantBits"
@@ -376,7 +380,12 @@ there. */
#define NC_ISSYSERR(err) ((err) > 0)
#define NC_NOERR 0 /**< No Error */
+#if 0
+/* This is unused, so re-purpose it to generic error */
#define NC2_ERR (-1) /**< Returned for all errors in the v2 API. */
+#else
+#define NC_ERROR (-1) /**< Returned for generic errors */
+#endif
/** Not a netcdf id.
@@ -531,8 +540,10 @@ by the desired type. */
#define NC_EOBJECT (-140) /**< Some object exists when it should not */
#define NC_ENOOBJECT (-141) /**< Some object not found */
#define NC_EPLUGIN (-142) /**< Unclassified failure in accessing a dynamically loaded plugin> */
+#define NC_ENOTZARR (-143) /**< Malformed (NC)Zarr file */
+#define NC_EZARRMETA (-144) /**< Malformed (NC)Zarr file consolidated metadata */
-#define NC4_LAST_ERROR (-142) /**< @internal All netCDF errors > this. */
+#define NC4_LAST_ERROR (-144) /**< @internal All netCDF errors > this. */
/*
* Don't forget to update docs/all-error-codes.md if adding new error codes here!
diff --git a/include/netcdf_filter.h b/include/netcdf_filter.h
index 70feb719ad..b5202f5d43 100644
--- a/include/netcdf_filter.h
+++ b/include/netcdf_filter.h
@@ -65,6 +65,10 @@ BLOSC_BITSHUFFLE=2 /* bit-wise shuffle */
enum BLOSC_SUBCOMPRESSORS {BLOSC_LZ=0, BLOSC_LZ4=1, BLOSC_LZ4HC=2, BLOSC_SNAPPY=3, BLOSC_ZLIB=4, BLOSC_ZSTD=5};
#endif
+/* Codecs for hdf5 filters that do not have a codec */
+#define H5Z_FILTER_RAW ((unsigned int)((int)-1)) /* Fake filter id */
+#define H5Z_CODEC_RAW "_hdf5raw_"
+
#if defined(__cplusplus)
extern "C" {
#endif
diff --git a/include/netcdf_filter_build.h b/include/netcdf_filter_build.h
index 11c80f8095..c0b55508b3 100644
--- a/include/netcdf_filter_build.h
+++ b/include/netcdf_filter_build.h
@@ -22,6 +22,16 @@
#include "netcdf_filter_hdf5_build.h"
+/* Avoid including netcdf_json.h and ncjson.h */
+#ifndef NCJSON_H
+#include "netcdf_json.h"
+#endif /*NCJSON_H*/
+
+/* Ditto */
+#ifndef NCPROPLIST_H
+#include "netcdf_proplist.h"
+#endif
+
/**************************************************/
/* Build To a NumCodecs-style C-API for Filters */
@@ -84,19 +94,21 @@ The function pointers defined in NCZ_codec_t manipulate HDF5 parameters and NumC
* Initialize use of the filter. This is invoked when a filter is loaded.
-void (*NCZ_codec_initialize)(void);
+void (*NCZ_codec_initialize)(struct NCproplist*);
* Finalize use of the filter. Since HDF5 does not provide this functionality, the codec may need to do it.
See H5Zblosc.c for an example. This function is invoked when a filter is unloaded.
-void (*NCZ_codec_finalize)(void);
+void (*NCZ_codec_finalize)(struct NCproplist*);
* Convert a JSON representation to an HDF5 representation. Invoked when a NumCodec JSON Codec is extracted
from Zarr metadata.
-int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp);
+int (*NCZ_codec_to_hdf5)(struct NCproplist* env, const char* codec, unsigned int* idp, size_t* nparamsp, unsigned** paramsp);
+@param env -- (in) extra environmental information
@param codec -- (in) ptr to JSON string representing the codec.
+@param idp -- the hdf5 filter id number;
@param nparamsp -- (out) store the length of the converted HDF5 unsigned vector
@param paramsp -- (out) store a pointer to the converted HDF5 unsigned vector;
caller frees. Note the double indirection.
@@ -105,8 +117,10 @@ int (*NCZ_codec_to_hdf5)(const char* codec, int* nparamsp, unsigned** paramsp);
* Convert an HDF5 vector of visible parameters to a JSON representation.
-int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
+int (*NCZ_hdf5_to_codec)(struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp);
+@param env -- (in) extra environmental information
+@param id -- the hdf5 filter id number;
@param nparams -- (in) the length of the HDF5 unsigned vector
@param params -- (in) pointer to the HDF5 unsigned vector.
@param codecp -- (out) store the string representation of the codec; caller must free.
@@ -115,10 +129,10 @@ int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
* Convert a set of visible parameters to a set of working parameters using extra environmental information.
Also allows for changes to the visible parameters. Invoked before filter is actually used.
-int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
+int (*NCZ_modify_parameters)(const struct NCproplist* env, unsigned* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
-@param ncid -- (in) ncid of the variable's group
-@param varid -- (in) varid of the variable
+@param env -- (in) properties, including file ncid and the variable varid
+@param idp -- (in/out) the hdf5 filter id number;
@params vnparamsp -- (in/out) number of visible parameters
@params vparamsp -- (in/out) vector of visible parameters
@params wnparamsp -- (out) number of working parameters
@@ -127,8 +141,10 @@ int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned**
* Convert an HDF5 vector of visible parameters to a JSON representation.
-int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
+-int (*NCZ_hdf5_to_codec)(const struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp);
+@param env -- (in) extra environmental information
+@param id -- (in) the hdf5 filter id number;
@param nparams -- (in) the length of the HDF5 unsigned vector
@param params -- (in) pointer to the HDF5 unsigned vector.
@param codecp -- (out) store the string representation of the codec; caller must free.
@@ -136,6 +152,22 @@ int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
*/
+/* Opaque */
+struct NCproplist;
+struct NCjson;
+
+/* Test if JSON dict is in raw format.
+@param jraw to test
+@return NCJ_OK if in raw format; NCJ_ERR/NC_ERROR otherwise.
+*/
+#ifndef NCraw_test
+#define NC_RAWTAG "hdf5raw"
+#define NC_RAWVERSION "1"
+#define NCraw_test(jraw) (jraw == NULL || NCJsort(jraw) != NCJ_DICT \
+ ? NCJ_ERR \
+ : (strcmp(NCJstring(NCJdictlookup(jraw,NC_RAWTAG)),NC_RAWVERSION)!=0 ? NCJ_ERR : NCJ_OK))
+#endif /*NCraw_test*/
+
/*
The struct that provides the necessary filter info.
The combination of version + sort uniquely determines
@@ -146,12 +178,12 @@ typedef struct NCZ_codec_t {
int sort; /* Format of remainder of the struct;
Currently always NCZ_CODEC_HDF5 */
const char* codecid; /* The name/id of the codec */
- unsigned int hdf5id; /* corresponding hdf5 id */
- void (*NCZ_codec_initialize)(void);
- void (*NCZ_codec_finalize)(void);
- int (*NCZ_codec_to_hdf5)(const char* codec, size_t* nparamsp, unsigned** paramsp);
- int (*NCZ_hdf5_to_codec)(size_t nparams, const unsigned* params, char** codecp);
- int (*NCZ_modify_parameters)(int ncid, int varid, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
+ unsigned hdf5id; /* corresponding hdf5 id */
+ void (*NCZ_codec_initialize)(const struct NCproplist* env);
+ void (*NCZ_codec_finalize)(const struct NCproplist* env);
+ int (*NCZ_codec_to_hdf5)(const struct NCproplist* env, const char* codec, unsigned* idp, size_t* nparamsp, unsigned** paramsp);
+ int (*NCZ_hdf5_to_codec)(const struct NCproplist* env, unsigned id, size_t nparams, const unsigned* params, char** codecp);
+ int (*NCZ_modify_parameters)(const struct NCproplist* env, unsigned* idp, size_t* vnparamsp, unsigned** vparamsp, size_t* wnparamsp, unsigned** wparamsp);
} NCZ_codec_t;
#ifndef NC_UNUSED
diff --git a/include/netcdf_filter_hdf5_build.h b/include/netcdf_filter_hdf5_build.h
index 63d2857eef..34de979ad0 100644
--- a/include/netcdf_filter_hdf5_build.h
+++ b/include/netcdf_filter_hdf5_build.h
@@ -39,16 +39,14 @@
#include
/* Older versions of the hdf library may define H5PL_type_t here */
#include
-
#else /*!USE_HDF5*/ /* Provide replacement definitions */
-
/* WARNING: In order make NCZARR independent of HDF5,
while still using HDF5-style filters, some HDF5
declarations need to be duplicated here with
different names. Watch out for changes in
the underlying HDF5 declarations.
- See the file H5Zpublic.h for more detailed descriptions.
+ See the file H5Zpublic.h or H5Zdevelop.h for more detailed descriptions.
Note that these declarations are always enabled because
HDF5-style filters may have been created with these definitions
@@ -61,8 +59,9 @@
/* H5Z_FILTER_RESERVED => H5Z_FILTER_RESERVED */
#define H5Z_FILTER_RESERVED 256 /*filter ids below this value are reserved for library use */
-/* H5Z_FILTER_MAX => H5Z_FILTER_MAX */
+#ifndef H5Z_FILTER_MAX
#define H5Z_FILTER_MAX 65535 /*maximum filter id */
+#endif
/* Only a limited set of definition and invocation flags are allowed */
#define H5Z_FLAG_MANDATORY 0x0000 /*filter is mandatory */
diff --git a/libdispatch/ncutil.h b/include/netcdf_vutils.h
similarity index 89%
rename from libdispatch/ncutil.h
rename to include/netcdf_vutils.h
index 44ab508b36..0324bade45 100644
--- a/libdispatch/ncutil.h
+++ b/include/netcdf_vutils.h
@@ -1,8 +1,8 @@
-/* Copyright 2018, UCAR/Unidata and OPeNDAP, Inc.
+/* Copyright 2018, UCAR/Unidata
See the COPYRIGHT file for more information. */
-#ifndef UTILS_H
-#define UTILS_H 1
+#ifndef NCVUTILS_H
+#define NCVUTILS_H 1
/* Define a header-only simple version of a dynamically expandable list and byte buffer */
/* To be used in code that should be independent of libnetcdf */
@@ -15,8 +15,8 @@ typedef struct VList {
typedef struct VString {
int nonextendible; /* 1 => fail if an attempt is made to extend this string*/
- unsigned int alloc;
- unsigned int length;
+ unsigned alloc;
+ unsigned length;
char* content;
} VString;
@@ -55,7 +55,7 @@ static void
vlistexpand(VList* l)
{
void** newcontent = NULL;
- size_t newsz;
+ unsigned newsz;
if(l == NULL) return;
newsz = (l->length * 2) + 1; /* basically double allocated space */
@@ -132,11 +132,11 @@ static void
vsexpand(VString* vs)
{
char* newcontent = NULL;
- size_t newsz;
+ unsigned newsz;
if(vs == NULL) return;
assert(vs->nonextendible == 0);
- newsz = (vs->alloc + VSTRALLOC); /* basically double allocated space */
+ newsz = (vs->alloc + VSTRALLOC); /* increase allocated space */
if(vs->alloc >= newsz) return; /* space already allocated */
newcontent=(char*)calloc(1,newsz+1);/* always room for nul term */
assert(newcontent != NULL);
@@ -154,7 +154,7 @@ vsappendn(VString* vs, const char* elem, unsigned n)
{
size_t need;
assert(vs != NULL && elem != NULL);
- if(n == 0) {n = strlen(elem);}
+ if(n == 0) {n = (unsigned)strlen(elem);}
need = vs->length + n;
if(vs->nonextendible) {
/* Space must already be available */
@@ -166,7 +166,7 @@ vsappendn(VString* vs, const char* elem, unsigned n)
memcpy(&vs->content[vs->length],elem,n);
vs->length += n;
if(!vs->nonextendible)
- vs->content[vs->length] = '\0';
+ vs->content[vs->length] = '\0'; /* guarantee nul term */
}
static void
@@ -196,7 +196,12 @@ static char*
vsextract(VString* vs)
{
char* x = NULL;
- if(vs == NULL || vs->content == NULL) return NULL;
+ if(vs == NULL) return NULL;
+ if(vs->content == NULL) {
+ /* guarantee content existence and nul terminated */
+ if((vs->content = calloc(1,sizeof(char)))==NULL) return NULL;
+ vs->length = 0;
+ }
x = vs->content;
vs->content = NULL;
vs->length = 0;
@@ -229,14 +234,14 @@ util_initialize(void)
/* Following are always "in-lined"*/
#define vlistcontents(l) ((l)==NULL?NULL:(l)->content)
-#define vlistlength(l) ((l)==NULL?0:(int)(l)->length)
+#define vlistlength(l) ((l)==NULL?0:(l)->length)
#define vlistclear(l) vlistsetlength(l,0)
#define vlistsetlength(l,len) do{if((l)!=NULL) (l)->length=len;} while(0)
#define vscontents(vs) ((vs)==NULL?NULL:(vs)->content)
-#define vslength(vs) ((vs)==NULL?0:(int)(vs)->length)
+#define vslength(vs) ((vs)==NULL?0:(vs)->length)
#define vscat(vs,s) vsappendn(vs,s,0)
#define vsclear(vs) vssetlength(vs,0)
#define vssetlength(vs,len) do{if((vs)!=NULL) (vs)->length=len;} while(0)
-#endif /*UTILS_H*/
+#endif /*NCVUTIL_H*/
diff --git a/libdap2/ncd2dispatch.c b/libdap2/ncd2dispatch.c
index 87b4c2eb82..23544d667d 100644
--- a/libdap2/ncd2dispatch.c
+++ b/libdap2/ncd2dispatch.c
@@ -806,7 +806,7 @@ fprintf(stderr,"\n");
NCattribute* att = (NCattribute*)nclistget(var->attributes,j);
char* val = NULL;
/* Check for _FillValue/Variable mismatch */
- if(strcmp(att->name,"_FillValue")==0) {
+ if(strcmp(att->name,NC_FillValue)==0) {
/* Special case var is byte, fillvalue is int16 and
unsignedattr == 0;
This exception is needed because DAP2 byte type
diff --git a/libdap4/d4http.c b/libdap4/d4http.c
index eb144c93ba..2006cd3da7 100644
--- a/libdap4/d4http.c
+++ b/libdap4/d4http.c
@@ -110,8 +110,11 @@ WriteMemoryCallback(void *ptr, size_t size, size_t nmemb, void *data)
nclog(NCLOGWARN,"WriteMemoryCallback: zero sized chunk");
/* Optimize for reading potentially large dods datasets */
while(!ncbytesavail(buf,realsize)) {
- /* double the size of the packet */
- ncbytessetalloc(buf,2*ncbytesalloc(buf));
+ /* double the size of the packet (unless the buf is empty) */
+ if(ncbytesalloc(buf) == 0)
+ ncbytessetalloc(buf,1024);
+ else
+ ncbytessetalloc(buf,2*ncbytesalloc(buf));
}
ncbytesappendn(buf, ptr, realsize);
#ifdef PROGRESS
diff --git a/libdap4/d4meta.c b/libdap4/d4meta.c
index bd04310d34..d11243045f 100644
--- a/libdap4/d4meta.c
+++ b/libdap4/d4meta.c
@@ -739,7 +739,7 @@ compileAttrValues(NCD4meta* builder, NCD4node* attr, void** memoryp, NClist* blo
memset((void*)&converter,0,sizeof(converter));
/* Deal with _FillValue */
- if(container->sort == NCD4_VAR && strcmp(attr->name,"_FillValue")==0) {
+ if(container->sort == NCD4_VAR && strcmp(attr->name,NC_FillValue)==0) {
/* Verify or fix or ignore or fail on type mismatch */
if(container->basetype != basetype) {/* _FillValue/Variable type mismatch */
int compatible = isfilltypecompatible(container->basetype, basetype);
diff --git a/libdap4/d4parser.c b/libdap4/d4parser.c
index a557ce1177..9250d5316f 100644
--- a/libdap4/d4parser.c
+++ b/libdap4/d4parser.c
@@ -65,10 +65,11 @@ static const struct KEYWORDINFO {
};
typedef struct KEYWORDINFO KEYWORDINFO;
-static const struct ATOMICTYPEINFO {
+/* Warning do not make const because sort will modify */
+static struct ATOMICTYPEINFO {
char* name; nc_type type; size_t size;
} atomictypeinfo[] = {
-/* Keep in sorted order for binary search */
+/* Will be sorted on first use */
/* Use lower case for canonical comparison, but keep proper name here */
{"Byte",NC_BYTE,sizeof(char)},
{"Char",NC_CHAR,sizeof(char)},
@@ -85,8 +86,9 @@ static const struct ATOMICTYPEINFO {
{"UInt64",NC_UINT64,sizeof(unsigned long long)},
{"UInt8",NC_UBYTE,sizeof(unsigned char)},
{"Url",NC_STRING,sizeof(char*)},
-{NULL,NC_NAT,0}
};
+#define NCD4_NATOMICTYPES (sizeof(atomictypeinfo)/sizeof(struct ATOMICTYPEINFO))
+static int atomictypessorted = 0;
/***************************************************/
@@ -116,7 +118,7 @@ static NCD4node* getOpaque(NCD4parser*, ncxml_t varxml, NCD4node* group);
static int getValueStrings(NCD4parser*, NCD4node*, ncxml_t xattr, NClist*);
static int isReserved(const char* name);
static const KEYWORDINFO* keyword(const char* name);
-static NCD4node* lookupAtomicType(NClist*, const char* name);
+static NCD4node* lookupAtomicType(NClist*,const char* name);
static NCD4node* lookFor(NClist* elems, const char* name, NCD4sort sort);
static NCD4node* lookupFQN(NCD4parser*, const char* sfqn, NCD4sort);
static int lookupFQNList(NCD4parser*, NClist* fqn, NCD4sort sort, NCD4node** result);
@@ -764,6 +766,7 @@ parseMaps(NCD4parser* parser, NCD4node* var, ncxml_t xml)
int ret = NC_NOERR;
ncxml_t x;
+ NC_UNUSED(parser);
for(x=ncxml_child(xml, "Map");x!= NULL;x=ncxml_next(x,"Map")) {
char* fqn;
fqn = ncxml_attr(x,"name");
@@ -937,6 +940,8 @@ static int
getValueStrings(NCD4parser* parser, NCD4node* type, ncxml_t xattr, NClist* svalues)
{
char* s;
+ NC_UNUSED(parser);
+ NC_UNUSED(type);
/* See first if we have a "value" xml attribute */
s = ncxml_attr(xattr,"value");
if(s != NULL)
@@ -1249,7 +1254,7 @@ defineBytestringType(NCD4parser* parser)
if(ret != NC_NOERR) goto done;
SETNAME(bstring,"_bytestring");
bstring->opaque.size = 0;
- bstring->basetype = lookupAtomicType(parser,"UInt8");
+ bstring->basetype = lookupAtomicType(parser->meta->atomictypes,"UInt8");
PUSH(parser->metadata->root->types,bstring);
parser->metadata->_bytestring = bstring;
} else
@@ -1259,16 +1264,25 @@ defineBytestringType(NCD4parser* parser)
}
#endif
+static int atisort(const void* a, const void* b)
+{
+ return strcasecmp(((struct ATOMICTYPEINFO*)a)->name,((struct ATOMICTYPEINFO*)b)->name);
+}
+
static int
defineAtomicTypes(NCD4meta* meta, NClist* list)
{
int ret = NC_NOERR;
NCD4node* node;
- const struct ATOMICTYPEINFO* ati;
+ size_t i;
- if(list == NULL)
- return THROW(NC_EINTERNAL);
- for(ati=atomictypeinfo;ati->name;ati++) {
+ if(list == NULL) return THROW(NC_EINTERNAL);
+ if(!atomictypessorted) {
+ qsort((void*)atomictypeinfo, NCD4_NATOMICTYPES,sizeof(struct ATOMICTYPEINFO),atisort);
+ atomictypessorted = 1;
+ }
+ for(i=0;itype,&node))) goto done;
SETNAME(node,ati->name);
PUSH(list,node);
@@ -1277,29 +1291,26 @@ defineAtomicTypes(NCD4meta* meta, NClist* list)
return THROW(ret);
}
+static int
+aticmp(const void* a, const void* b)
+{
+ const char* name = (const char*)a;
+ NCD4node** nodebp = (NCD4node**)b;
+ return strcasecmp(name,(*nodebp)->name);
+}
+
/* Binary search the set of set of atomictypes */
static NCD4node*
lookupAtomicType(NClist* atomictypes, const char* name)
{
- size_t n = nclistlength(atomictypes);
- if (n == 0) return NULL;
- size_t L = 0;
- size_t R = n - 1;
- NCD4node* p;
-
- for(;;) {
- if(L > R) break;
- size_t m = (L + R) / 2;
- p = (NCD4node*)nclistget(atomictypes,m);
- int cmp = strcasecmp(p->name,name);
- if(cmp == 0)
- return p;
- if(cmp < 0)
- L = (m + 1);
- else /*cmp > 0*/
- R = (m - 1);
- }
- return NULL;
+ void* match = NULL;
+ size_t ntypes = 0;
+ NCD4node** types = NULL;
+ assert(atomictypessorted && nclistlength(atomictypes) > 0);
+ ntypes = nclistlength(atomictypes);
+ types = (NCD4node**)atomictypes->content;
+ match = bsearch((void*)name,(void*)types,ntypes,sizeof(NCD4node*),aticmp);
+ return (match==NULL?NULL:*(NCD4node**)match);
}
/**************************************************/
@@ -1650,6 +1661,7 @@ parseForwards(NCD4parser* parser, NCD4node* root)
int ret = NC_NOERR;
size_t i,j;
+ NC_UNUSED(root);
/* process all vars */
for(i=0;ivars);i++) {
NCD4node* var = (NCD4node*)nclistget(parser->vars,i);
diff --git a/libdispatch/CMakeLists.txt b/libdispatch/CMakeLists.txt
index 0f5d66d085..68251ba185 100644
--- a/libdispatch/CMakeLists.txt
+++ b/libdispatch/CMakeLists.txt
@@ -8,10 +8,9 @@ add_library(dispatch OBJECT)
target_sources(dispatch
PRIVATE
- dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c dinfermodel.c
- daux.c dinstance.c dinstance_intern.c
- dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c dparallel.c dmissing.c
- ncproplist.c
+dcopy.c dfile.c ddim.c datt.c dattinq.c dattput.c dattget.c derror.c dvar.c dvarget.c dvarput.c dvarinq.c ddispatch.c nclog.c dstring.c dutf8.c dinternal.c doffsets.c ncuri.c nclist.c ncbytes.c nchashmap.c nctime.c nc.c nclistmgr.c utf8proc.h utf8proc.c dpathmgr.c dutil.c drc.c dauth.c dreadonly.c dnotnc4.c dnotnc3.c dinfermodel.c
+daux.c dinstance.c dinstance_intern.c
+dcrc32.c dcrc32.h dcrc64.c ncexhash.c ncxcache.c ncjson.c ds3util.c dparallel.c dmissing.c ncproplist.c
)
if (NETCDF_ENABLE_DLL)
diff --git a/libdispatch/Makefile.am b/libdispatch/Makefile.am
index 8d35269ea3..9001ed4c25 100644
--- a/libdispatch/Makefile.am
+++ b/libdispatch/Makefile.am
@@ -51,7 +51,7 @@ endif # NETCDF_ENABLE_BYTERANGE
if NETCDF_ENABLE_S3
if NETCDF_ENABLE_S3_INTERNAL
# Renamed to avoid conflicts with the HDF5 files
-libdispatch_la_SOURCES += ncs3sdk_h5.c nch5s3comms.c nch5s3comms.h ncutil.h nccurl_setup.h \
+libdispatch_la_SOURCES += ncs3sdk_h5.c nch5s3comms.c nch5s3comms.h nccurl_setup.h \
nccurl_sha256.c nccurl_sha256.h nccurl_hmac.c nccurl_hmac.h
AM_CPPFLAGS += -I$(top_srcdir)/libncxml
libdispatch_la_CPPFLAGS += ${AM_CPPFLAGS}
diff --git a/libdispatch/daux.c b/libdispatch/daux.c
index 71e2a7c524..c79be9cca7 100644
--- a/libdispatch/daux.c
+++ b/libdispatch/daux.c
@@ -952,6 +952,8 @@ This function is just a wrapper around nc_dump__data.
@return error code
*/
+EXTERNL int nc_dump_data(int ncid, nc_type xtype, void* memory, size_t count, char** bufp);
+
EXTERNL int
ncaux_dump_data(int ncid, int xtype, void* memory, size_t count, char** bufp)
{
diff --git a/libdispatch/dcopy.c b/libdispatch/dcopy.c
index 82acc353ad..33ac830e04 100644
--- a/libdispatch/dcopy.c
+++ b/libdispatch/dcopy.c
@@ -12,15 +12,12 @@
#include "nc_logging.h"
#include "nclist.h"
+/* Forward */
static int NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2);
-
#ifdef USE_NETCDF4
-
static int searchgroup(int ncid1, int tid1, int grp, int* tid2);
static int searchgrouptree(int ncid1, int tid1, int grp, int* tid2);
-
-#endif /*USE_NETCDF4*/
-
+#endif
#ifdef USE_NETCDF4
/**
@@ -59,8 +56,8 @@ NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2, int *equalp)
}
else
{
- size_t i;
int ret, equal1;
+ size_t i;
char name1[NC_MAX_NAME];
char name2[NC_MAX_NAME];
size_t size1, size2;
@@ -212,44 +209,6 @@ NC_rec_find_nc_type(int ncid1, nc_type tid1, int ncid2, nc_type* tid2)
#endif /* USE_NETCDF4 */
-/**
- * @internal Given a type in one file, find its equal (if any) in
- * another file. It sounds so simple, but it's a real pain!
- *
- * @param ncid1 File ID.
- * @param xtype1 Type ID.
- * @param ncid2 File ID.
- * @param xtype2 Pointer that gets type ID of equal type.
- *
- * @return ::NC_NOERR No error.
- * @author Ed Hartnett
-*/
-static int
-NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2)
-{
- int ret = NC_NOERR;
-
- /* Check input */
- if(xtype1 <= NC_NAT)
- return NC_EINVAL;
-
- /* Handle atomic types. */
- if (xtype1 <= NC_MAX_ATOMIC_TYPE)
- {
- if(xtype2)
- *xtype2 = xtype1;
- return NC_NOERR;
- }
-
-#ifdef USE_NETCDF4
- /* Recursively search group ncid2 and its children
- to find a type that is equal (using compare_type)
- to xtype1. */
- ret = NC_rec_find_nc_type(ncid1, xtype1 , ncid2, xtype2);
-#endif /* USE_NETCDF4 */
- return ret;
-}
-
/**
* This will copy a variable that is an array of primitive type and
* its attributes from one file to another, assuming dimensions in the
@@ -737,5 +696,45 @@ searchgrouptree(int ncid1, int tid1, int grp, int* tid2)
return ret;
}
-#endif /* USE_NETCDF4 */
+#endif
+
+/**
+ * @internal Given a type in one file, find its equal (if any) in
+ * another file. It sounds so simple, but it's a real pain!
+ *
+ * @param ncid1 File ID.
+ * @param xtype1 Type ID.
+ * @param ncid2 File ID.
+ * @param xtype2 Pointer that gets type ID of equal type.
+ *
+ * @return ::NC_NOERR No error.
+ * @return ::NC_EBADTYPE
+ * @author Ed Hartnett
+*/
+static int
+NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2)
+{
+ int ret = NC_NOERR;
+
+ /* Check input */
+ if(xtype1 <= NC_NAT)
+ return NC_EINVAL;
+
+ /* Handle atomic types. */
+ if (xtype1 <= NC_MAX_ATOMIC_TYPE)
+ {
+ if(xtype2)
+ *xtype2 = xtype1;
+ return NC_NOERR;
+ }
+#ifdef USE_NETCDF4
+ /* Recursively search group ncid2 and its children
+ to find a type that is equal (using compare_type)
+ to xtype1. */
+ ret = NC_rec_find_nc_type(ncid1, xtype1 , ncid2, xtype2);
+#else
+ ret = NC_EBADTYPE;
+#endif
+ return ret;
+}
diff --git a/libdispatch/ddispatch.c b/libdispatch/ddispatch.c
index 4ccc65d8d2..79b893f44b 100644
--- a/libdispatch/ddispatch.c
+++ b/libdispatch/ddispatch.c
@@ -4,25 +4,27 @@ See LICENSE.txt for license information.
*/
#include "config.h"
-#include "ncdispatch.h"
-#include "ncuri.h"
-#include "nclog.h"
-#include "ncbytes.h"
-#include "ncrc.h"
-#include "ncoffsets.h"
-#include "ncpathmgr.h"
-#include "ncxml.h"
-#include "nc4internal.h"
/* Required for getcwd, other functions. */
#ifdef HAVE_UNISTD_H
#include
#endif
-
/* Required for getcwd, other functions. */
#ifdef _WIN32
#include
#endif
+#include
+
+#include "netcdf.h"
+#include "ncdispatch.h"
+#include "ncuri.h"
+#include "nclog.h"
+#include "ncbytes.h"
+#include "ncrc.h"
+#include "ncoffsets.h"
+#include "ncpathmgr.h"
+#include "ncxml.h"
+#include "nc4internal.h"
#if defined(NETCDF_ENABLE_BYTERANGE) || defined(NETCDF_ENABLE_DAP) || defined(NETCDF_ENABLE_DAP4)
#include
@@ -32,18 +34,52 @@ See LICENSE.txt for license information.
#include "ncs3sdk.h"
#endif
+/**************************************************/
+/* Global State constants and state */
+
#define MAXPATH 1024
+/* The singleton global state object */
+static NCglobalstate* nc_globalstate = NULL;
+
/* Define vectors of zeros and ones for use with various nc_get_varX functions */
/* Note, this form of initialization fails under Cygwin */
size_t NC_coord_zero[NC_MAX_VAR_DIMS] = {0};
size_t NC_coord_one[NC_MAX_VAR_DIMS] = {1};
ptrdiff_t NC_stride_one[NC_MAX_VAR_DIMS] = {1};
-/*
-static nc_type longtype = (sizeof(long) == sizeof(int)?NC_INT:NC_INT64);
-static nc_type ulongtype = (sizeof(unsigned long) == sizeof(unsigned int)?NC_UINT:NC_UINT64);
-*/
+/**************************************************/
+/* Atomic type constants */
+
+/* The sizes of types may vary from platform to platform, but within
+ * netCDF files, type sizes are fixed. */
+#define NC_CHAR_LEN sizeof(char) /**< @internal Size of char. */
+#define NC_STRING_LEN sizeof(char *) /**< @internal Size of char *. */
+#define NC_BYTE_LEN 1 /**< @internal Size of byte. */
+#define NC_SHORT_LEN 2 /**< @internal Size of short. */
+#define NC_INT_LEN 4 /**< @internal Size of int. */
+#define NC_FLOAT_LEN 4 /**< @internal Size of float. */
+#define NC_DOUBLE_LEN 8 /**< @internal Size of double. */
+#define NC_INT64_LEN 8 /**< @internal Size of int64. */
+
+/** @internal Names of atomic types. */
+const char* nc4_atomic_name[NUM_ATOMIC_TYPES] = {"none", "byte", "char",
+ "short", "int", "float",
+ "double", "ubyte",
+ "ushort", "uint",
+ "int64", "uint64", "string"};
+static const size_t nc4_atomic_size[NUM_ATOMIC_TYPES] = {0, NC_BYTE_LEN, NC_CHAR_LEN, NC_SHORT_LEN,
+ NC_INT_LEN, NC_FLOAT_LEN, NC_DOUBLE_LEN,
+ NC_BYTE_LEN, NC_SHORT_LEN, NC_INT_LEN, NC_INT64_LEN,
+ NC_INT64_LEN, NC_STRING_LEN};
+
+/**************************************************/
+/* Forward */
+static int NC_createglobalstate(void);
+
+/**************************************************/
+/** \defgroup dispatch_initialize functions. */
+/** \{ */
/* Allow dispatch to do general initialization and finalization */
int
@@ -141,16 +177,9 @@ NCDISPATCH_finalize(void)
NC_freeglobalstate(); /* should be one of the last things done */
return status;
}
+/** \} */
/**************************************************/
-/* Global State constants and state */
-
-/* The singleton global state object */
-static NCglobalstate* nc_globalstate = NULL;
-
-/* Forward */
-static int NC_createglobalstate(void);
-
/** \defgroup global_state Global state functions. */
/** \{
@@ -225,34 +254,13 @@ NC_freeglobalstate(void)
/** \} */
/**************************************************/
-/** \defgroup atomic_types Atomic Type functions */
+/** \defgroup atomic_types Atomic Type functions, where
+ atomic does not include NC_STRING. */
/** \{
\ingroup atomic_types
*/
-/* The sizes of types may vary from platform to platform, but within
- * netCDF files, type sizes are fixed. */
-#define NC_CHAR_LEN sizeof(char) /**< @internal Size of char. */
-#define NC_STRING_LEN sizeof(char *) /**< @internal Size of char *. */
-#define NC_BYTE_LEN 1 /**< @internal Size of byte. */
-#define NC_SHORT_LEN 2 /**< @internal Size of short. */
-#define NC_INT_LEN 4 /**< @internal Size of int. */
-#define NC_FLOAT_LEN 4 /**< @internal Size of float. */
-#define NC_DOUBLE_LEN 8 /**< @internal Size of double. */
-#define NC_INT64_LEN 8 /**< @internal Size of int64. */
-
-/** @internal Names of atomic types. */
-const char* nc4_atomic_name[NUM_ATOMIC_TYPES] = {"none", "byte", "char",
- "short", "int", "float",
- "double", "ubyte",
- "ushort", "uint",
- "int64", "uint64", "string"};
-static const size_t nc4_atomic_size[NUM_ATOMIC_TYPES] = {0, NC_BYTE_LEN, NC_CHAR_LEN, NC_SHORT_LEN,
- NC_INT_LEN, NC_FLOAT_LEN, NC_DOUBLE_LEN,
- NC_BYTE_LEN, NC_SHORT_LEN, NC_INT_LEN, NC_INT64_LEN,
- NC_INT64_LEN, NC_STRING_LEN};
-
/**
* @internal Get the name and size of an atomic type. For strings, 1 is
* returned.
diff --git a/libdispatch/derror.c b/libdispatch/derror.c
index f1fd8bf5c4..e5ae6d121f 100644
--- a/libdispatch/derror.c
+++ b/libdispatch/derror.c
@@ -100,6 +100,8 @@ const char *nc_strerror(int ncerr1)
{
case NC_NOERR:
return "No error";
+ case NC_ERROR:
+ return "Non-specific error";
case NC_EBADID:
return "NetCDF: Not a valid ID";
case NC_ENFILE:
@@ -283,7 +285,11 @@ const char *nc_strerror(int ncerr1)
return "NetCDF: Some object not found";
case NC_EPLUGIN:
return "NetCDF: Unclassified failure in accessing a dynamically loaded plugin";
- default:
+ case NC_ENOTZARR:
+ return "Malformed (NC)Zarr file";
+ case NC_EZARRMETA:
+ return "Malformed (NC)Zarr file consolidated metadata";
+ default:
#ifdef USE_PNETCDF
/* The behavior of ncmpi_strerror here is to return
NULL, not a string. This causes problems in (at least)
diff --git a/libdispatch/dfilter.c b/libdispatch/dfilter.c
index 206515a850..e0e25609be 100644
--- a/libdispatch/dfilter.c
+++ b/libdispatch/dfilter.c
@@ -8,11 +8,8 @@
*/
#include "config.h"
-#include
-#include
-#include
-#ifdef _MSC_VER
-#include
+#ifdef USE_HDF5
+#include "hdf5internal.h"
#endif
#include "netcdf.h"
@@ -21,10 +18,6 @@
#include "nc4internal.h"
#include "nclog.h"
-#ifdef USE_HDF5
-#include "hdf5internal.h"
-#endif
-
#ifdef NETCDF_ENABLE_NCZARR
#include "zdispatch.h"
#endif
diff --git a/libdispatch/dhttp.c b/libdispatch/dhttp.c
index 45f93828f6..7c96800a5c 100644
--- a/libdispatch/dhttp.c
+++ b/libdispatch/dhttp.c
@@ -161,7 +161,7 @@ nc_http_close(NC_HTTP_STATE* state)
#ifdef NETCDF_ENABLE_S3
case HTTPS3: {
if(state->s3.s3client)
- NC_s3sdkclose(state->s3.s3client, state->s3.info, 0, NULL);
+ NC_s3sdkclose(state->s3.s3client, NULL);
NC_s3clear(state->s3.info);
nullfree(state->s3.info);
state->s3.s3client = NULL;
diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c
index adb3f13779..a0cbe241c9 100644
--- a/libdispatch/dinfermodel.c
+++ b/libdispatch/dinfermodel.c
@@ -127,7 +127,7 @@ static struct FORMATMODES {
{"udf1",NC_FORMATX_UDF1,0},
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
-{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
+{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until netcdf-3 vs netcdf-4 is determined */
{NULL,0},
};
@@ -137,15 +137,13 @@ static const struct MACRODEF {
char* defkey;
char* defvalues[4];
} macrodefs[] = {
-{"zarr","mode",{"nczarr","zarr",NULL}},
+{"zarr","mode",{"zarr",NULL}},
+{"nczarr","mode",{"nczarr",NULL}},
{"dap2","mode",{"dap2",NULL}},
{"dap4","mode",{"dap4",NULL}},
-{"s3","mode",{"s3","nczarr",NULL}},
+{"s3","mode",{"s3",NULL}},
+{"gs3","mode",{"gs3",NULL}}, /* Google S3 API */
{"bytes","mode",{"bytes",NULL}},
-{"xarray","mode",{"zarr", NULL}},
-{"noxarray","mode",{"nczarr", "noxarray", NULL}},
-{"zarr","mode",{"nczarr","zarr", NULL}},
-{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */
{NULL,NULL,{NULL}}
};
@@ -162,10 +160,8 @@ static const struct MODEINFER {
char* key;
char* inference;
} modeinferences[] = {
-{"zarr","nczarr"},
{"xarray","zarr"},
{"noxarray","nczarr"},
-{"noxarray","zarr"},
{NULL,NULL}
};
@@ -174,6 +170,7 @@ static const struct MODEINFER modenegations[] = {
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
{"bytes","zarr"},
{"noxarray","xarray"},
+{"nozmetadata","zmetadata"},
{NULL,NULL}
};
@@ -413,6 +410,7 @@ envvlist2string(NClist* envv, const char* delim)
NCbytes* buf = NULL;
char* result = NULL;
+ NC_UNUSED(delim);
if(envv == NULL || nclistlength(envv) == 0) return NULL;
buf = ncbytesnew();
for(i=0;i= NC_FIRSTUSERTYPEID) {
stat = nc_inq_user_type(ncid,typeid,name,size,basetypep,nfieldsp,classp);
} else
-#endif
+#endif /*USE_NETCDF4*/
if(typeid > NC_NAT && typeid <= NC_MAX_ATOMIC_TYPE) {
if(basetypep) *basetypep = NC_NAT;
if(nfieldsp) *nfieldsp = 0;
diff --git a/libdispatch/dinstance_intern.c b/libdispatch/dinstance_intern.c
index f1dac8eda6..6a4477e23e 100644
--- a/libdispatch/dinstance_intern.c
+++ b/libdispatch/dinstance_intern.c
@@ -20,6 +20,7 @@ Currently two operations are defined:
#include "nc4dispatch.h"
#include "ncoffsets.h"
#include "ncbytes.h"
+#include "nclog.h"
#undef REPORT
#undef DEBUG
@@ -76,8 +77,11 @@ NC_reclaim_data(NC* nc, nc_type xtype, void* memory, size_t count)
NC_TYPE_INFO_T* utype = NULL;
assert(nc != NULL);
- assert((memory == NULL && count == 0) || (memory != NULL || count > 0));
+ /* If memory is NULL, ignore count */
+ assert(memory == NULL || (memory != NULL && count > 0));
+ if(memory == NULL) goto done;
+
/* Process atomic types */
/* Optimize: Vector of fixed size atomic types (always the case for netcdf-3)*/
@@ -88,7 +92,7 @@ NC_reclaim_data(NC* nc, nc_type xtype, void* memory, size_t count)
if(xtype == NC_STRING) {
char** ss = (char**)memory;
for(i=0;ilen > 0 && vlen->p != NULL) {
char** slist = (char**)vlen->p; /* vlen instance is a vector of string pointers */
- for(i=0;ilen;i++) {if(slist[i] != NULL) free(slist[i]);}
+ for(i=0;i<(int)vlen->len;i++) {if(slist[i] != NULL) {free(slist[i]);slist[i] = NULL;}}
}
goto out;
}
@@ -167,12 +172,12 @@ reclaim_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position instance)
if((stat = NC_type_alignment_internal(file,basetypeid,basetype,&alignment))) goto done;;
vinstance.memory = (char*)vlen->p; /* use char* so we can do pointer arithmetic */
vinstance.memory = (void*)NC_read_align((uintptr_t)vinstance.memory,alignment);
- for(i=0;ilen;i++) {
+ for(i=0;i<(int)vlen->len;i++) {
if((stat=reclaim_datar(file,basetype,vinstance))) goto done; /* reclaim one basetype instance */
vinstance.memory += basetype->size; /* move to next base instance */
}
out:
- if(vlen->len > 0 && vlen->p != NULL) {free(vlen->p);}
+ if(vlen->len > 0 && vlen->p != NULL) {free(vlen->p); vlen->p = NULL;}
goto done;
} else if(utype->nc_type_class == NC_COMPOUND) {
Position finstance; /* mark the fields's instance */
@@ -198,7 +203,7 @@ reclaim_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position instance)
if(field->nc_typeid == NC_STRING) {
char** strvec = (char**)finstance.memory;
for(i=0;ip = (void*)dststrvec;
- for(i=0;ilen;i++) {
+ for(i=0;i<(int)srcvlens->len;i++) {
if((dststrvec[i] = strdup(srcstrvec[i]))==NULL) {stat = NC_ENOMEM; goto done;}
}
goto done;
@@ -406,7 +411,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d
dstvlens->p = vdst.memory; /* don't lose it */
vsrc.memory = (void*)NC_read_align((uintptr_t)vsrc.memory,alignment);
vdst.memory = (void*)NC_read_align((uintptr_t)vdst.memory,alignment);
- for(i=0;ilen;i++) {
+ for(i=0;i<(int)srcvlens->len;i++) {
if((stat=copy_datar(file,basetype,vsrc,vdst))) goto done;
vsrc.memory += basetype->size;
vdst.memory += basetype->size;
@@ -441,7 +446,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d
if(field->nc_typeid == NC_STRING) {
char** srcstrvec = (char**)src.memory;
char** dststrvec = (char**)dst.memory;
- for(i=0;isize;
fdst.memory += basetype->size;
@@ -465,7 +470,7 @@ copy_datar(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* utype, Position src, Position d
} else {stat = NC_EBADTYPE; goto done;}
done:
- return stat;
+ return NCTHROW(stat);
}
#endif
@@ -531,7 +536,7 @@ NC_type_alignment_internal(NC_FILE_INFO_T* file, nc_type xtype, NC_TYPE_INFO_T*
Why was this here?
if(stat == NC_NOERR && align == 0) stat = NC_EINVAL;
#endif
- return stat;
+ return NCTHROW(stat);
}
#endif
@@ -546,11 +551,15 @@ NC_reclaim_data_all(NC* nc, nc_type xtypeid, void* memory, size_t count)
int stat = NC_NOERR;
assert(nc != NULL);
+ /* If memory is NULL, ignore count */
+ assert(memory == NULL || (memory != NULL && count > 0));
+ if(memory == NULL) goto done;
stat = NC_reclaim_data(nc,xtypeid,memory,count);
if(stat == NC_NOERR && memory != NULL)
- free(memory);
- return stat;
+ {free(memory); memory = NULL;}
+done:
+ return NCTHROW(stat);
}
/* Alternate entry point: includes recovering the top-level memory */
@@ -597,7 +606,7 @@ NC_copy_data_all(NC* nc, nc_type xtype, const void* memory, size_t count, void**
#endif
if(copyp) {*copyp = copy; copy = NULL;}
done:
- return stat;
+ return NCTHROW(stat);
}
/* Alternate entry point: includes recovering the top-level memory */
diff --git a/libdispatch/dmissing.c b/libdispatch/dmissing.c
index 0c9be82745..7a1cc3249b 100644
--- a/libdispatch/dmissing.c
+++ b/libdispatch/dmissing.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2018, University Corporation for Atmospheric Research
+ * Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
@@ -70,24 +70,24 @@ strdup(const char* s)
size_t
strlcpy(char *dst, const char* src, size_t dsize)
{
- const char *osrc = src;
- size_t nleft = dsize;
-
- /* Copy as many bytes as will fit. */
- if (nleft != 0) {
- while (--nleft != 0) {
- if ((*dst++ = *src++) == '\0')
- break;
- }
- }
- /* Not enough room in dst, add NUL and traverse rest of src. */
- if (nleft == 0) {
- if (dsize != 0)
- *dst = '\0'; /* NUL-terminate dst */
- while (*src++)
- ;
- }
- return(src - osrc - 1); /* count does not include NUL */
+ const char *osrc = src;
+ size_t nleft = dsize;
+
+ /* Copy as many bytes as will fit. */
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*dst++ = *src++) == '\0')
+ break;
+ }
+ }
+ /* Not enough room in dst, add NUL and traverse rest of src. */
+ if (nleft == 0) {
+ if (dsize != 0)
+ *dst = '\0'; /* NUL-terminate dst */
+ while (*src++)
+ ;
+ }
+ return(src - osrc - 1); /* count does not include NUL */
}
#endif
@@ -119,29 +119,33 @@ strlcpy(char *dst, const char* src, size_t dsize)
size_t
nc_strlcat(char* dst, const char* src, size_t dsize)
{
- const char *odst = dst;
- const char *osrc = src;
- size_t n = dsize;
- size_t dlen;
-
- /* Find the end of dst and adjust bytes left but don't go past end. */
- while (n-- != 0 && *dst != '\0')
- dst++;
- dlen = dst - odst;
- n = dsize - dlen;
-
- if (n-- == 0)
- return(dlen + strlen(src));
- while (*src != '\0') {
- if (n != 0) {
- *dst++ = *src;
- n--;
- }
- src++;
- }
- *dst = '\0';
-
- return(dlen + (src - osrc)); /* count does not include NUL */
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen,slen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ slen = (src==NULL?0:strlen(src));
+ if (n-- == 0)
+ return(dlen + slen);
+ if(src != NULL) {
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+ }
+ if(src != NULL)
+ return(dlen + (src - osrc)); /* count does not include NUL */
+ return dlen;
}
#endif /*!HAVE_STRLCAT*/
@@ -151,11 +155,11 @@ nc_strlcat(char* dst, const char* src, size_t dsize)
Not currently used
/* Define an version of strcasestr renamed to avoid any system definition */
/* See https://android.googlesource.com/platform/bionic/+/a27d2baa/libc/string/strcasestr.c */
-/* $OpenBSD: strcasestr.c,v 1.3 2006/03/31 05:34:55 deraadt Exp $ */
-/* $NetBSD: strcasestr.c,v 1.2 2005/02/09 21:35:47 kleink Exp $ */
+/* $OpenBSD: strcasestr.c,v 1.3 2006/03/31 05:34:55 deraadt Exp $ */
+/* $NetBSD: strcasestr.c,v 1.2 2005/02/09 21:35:47 kleink Exp $ */
/*-
* Copyright (c) 1990, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Chris Torek.
diff --git a/libdispatch/dplugins.c b/libdispatch/dplugins.c
index 4347d8227b..f01360e67e 100644
--- a/libdispatch/dplugins.c
+++ b/libdispatch/dplugins.c
@@ -110,6 +110,7 @@ nc_plugin_path_initialize(void)
assert(dst != NULL);
for(i=0;irchome);
rcfreeentries(info->entries);
NC_s3freeprofilelist(info->s3profiles);
-
}
static void
diff --git a/libdispatch/ds3util.c b/libdispatch/ds3util.c
index 2b81f342bc..9401ace692 100644
--- a/libdispatch/ds3util.c
+++ b/libdispatch/ds3util.c
@@ -25,6 +25,7 @@
#include "nc4internal.h"
#include "ncuri.h"
#include "nclist.h"
+#include "ncbytes.h"
#include "ncrc.h"
#include "nclog.h"
#include "ncs3sdk.h"
@@ -34,9 +35,6 @@
/* Alternate .aws directory location */
#define NC_TEST_AWS_DIR "NC_TEST_AWS_DIR"
-#define AWSHOST ".amazonaws.com"
-#define GOOGLEHOST "storage.googleapis.com"
-
enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4};
/* Read these files in order and later overriding earlier */
@@ -47,6 +45,7 @@ static const char* awsconfigfiles[] = {".aws/config",".aws/credentials",NULL};
/* Forward */
static int endswith(const char* s, const char* suffix);
+static void freeprofile(struct AWSprofile* profile);
static void freeentry(struct AWSentry* e);
static int awsparse(const char* text, NClist* profiles);
@@ -78,9 +77,9 @@ NC_s3sdkenvironment(void)
/*
Rebuild an S3 url into a canonical path-style url.
If region is not in the host, then use specified region
-if provided, otherwise leave blank and let the S3 server deal with it.
-@param url (in) the current url
-@param s3 (in/out) NCS3INFO structure
+if provided, otherwise us-east-1.
+@param url (in) the current url
+@param s3 (in/out) NCS3INFO structure
@param pathurlp (out) the resulting pathified url string
*/
@@ -309,7 +308,13 @@ NC_s3urlprocess(NCURI* url, NCS3INFO* s3, NCURI** newurlp)
/* Rebuild the URL to path format and get a usable region and optional bucket*/
if((stat = NC_s3urlrebuild(url,s3,&url2))) goto done;
- s3->host = strdup(url2->host);
+ if(url2->port){
+ char hostport[8192];
+ snprintf(hostport,sizeof(hostport),"%s:%s",url2->host,url2->port);
+ s3->host = strdup(hostport);
+ }else{
+ s3->host = strdup(url2->host);
+ }
/* construct the rootkey minus the leading bucket */
pathsegments = nclistnew();
if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done;
@@ -358,19 +363,29 @@ NC_s3clear(NCS3INFO* s3)
}
/*
-Check if a url has indicators that signal an S3 or Google S3 url.
+Check if a url has indicators that signal an S3 or Google S3 url or ZoH S3 url.
+The rules are as follows:
+1. If the protocol is "s3" or "gs3" or "zoh", then return (true,s3|gs3|zoh).
+2. If the mode contains "s3" or "gs3" or "zoh", then return (true,s3|gs3|zoh).
+3. Check the host name:
+3.1 If the host ends with ".amazonaws.com", then return (true,s3).
+3.1 If the host is "storage.googleapis.com", then return (true,gs3).
+4. Otherwise return (false,unknown).
*/
int
-NC_iss3(NCURI* uri, enum NCS3SVC* svcp)
+NC_iss3(NCURI* uri, NCS3SVC* svcp)
{
int iss3 = 0;
NCS3SVC svc = NCS3UNK;
if(uri == NULL) goto done; /* not a uri */
- /* is the protocol "s3" or "gs3" ? */
+ /* is the protocol "s3" or "gs3" or "zoh" ? */
if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; svc = NCS3; goto done;}
if(strcasecmp(uri->protocol,"gs3")==0) {iss3 = 1; svc = NCS3GS; goto done;}
+#ifdef NETCDF_ENABLE_ZOH
+ if(strcasecmp(uri->protocol,"zoh")==0) {iss3 = 1; svc = NCS3ZOH; goto done;}
+#endif
/* Is "s3" or "gs3" in the mode list? */
if(NC_testmode(uri,"s3")) {iss3 = 1; svc = NCS3; goto done;}
if(NC_testmode(uri,"gs3")) {iss3 = 1; svc = NCS3GS; goto done;}
@@ -384,18 +399,71 @@ NC_iss3(NCURI* uri, enum NCS3SVC* svcp)
return iss3;
}
-const char*
-NC_s3dumps3info(NCS3INFO* info)
+/**************************************************/
+/**
+The .aws/config and .aws/credentials files
+are in INI format (https://en.wikipedia.org/wiki/INI_file).
+This format is not well defined, so the grammar used
+here is restrictive. Here, the term "profile" is the same
+as the INI term "section".
+
+The grammar used is as follows:
+
+Grammar:
+
+inifile: profilelist ;
+profilelist: profile | profilelist profile ;
+profile: '[' profilename ']' EOL entries ;
+entries: empty | entries entry ;
+entry: WORD = WORD EOL ;
+profilename: WORD ;
+Lexical:
+WORD sequence of printable characters - [ \[\]=]+
+EOL '\n' | ';'
+
+Note:
+1. The semicolon at beginning of a line signals a comment.
+2. # comments are not allowed
+3. Duplicate profiles or keys are ignored.
+4. Escape characters are not supported.
+*/
+
+#define AWS_EOF (-1)
+#define AWS_ERR (0)
+#define AWS_WORD (0x10001)
+#define AWS_EOL (0x10002)
+
+typedef struct AWSparser {
+ char* text;
+ char* pos;
+ size_t yylen; /* |yytext| */
+ NCbytes* yytext;
+ int token; /* last token found */
+ int pushback; /* allow 1-token pushback */
+} AWSparser;
+
+#ifdef LEXDEBUG
+static const char*
+tokenname(int token)
{
- static char text[8192];
- snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s",
- (info->host?info->host:"null"),
- (info->region?info->region:"null"),
- (info->bucket?info->bucket:"null"),
- (info->rootkey?info->rootkey:"null"),
- (info->profile?info->profile:"null"));
- return text;
+ static char num[32];
+ switch(token) {
+ case AWS_EOF: return "EOF";
+ case AWS_ERR: return "ERR";
+ case AWS_WORD: return "WORD";
+ default: snprintf(num,sizeof(num),"%d",token); return num;
+ }
+ return "UNKNOWN";
}
+#endif
+
+/*
+@param text of the aws credentials file
+@param profiles list of form struct AWSprofile (see ncauth.h)
+*/
+
+#define LBR '['
+#define RBR ']'
static void
freeprofile(struct AWSprofile* profile)
@@ -426,6 +494,19 @@ NC_s3freeprofilelist(NClist* profiles)
}
}
+const char*
+NC_s3dumps3info(NCS3INFO* info)
+{
+ static char text[8192];
+ snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s",
+ (info->host?info->host:"null"),
+ (info->region?info->region:"null"),
+ (info->bucket?info->bucket:"null"),
+ (info->rootkey?info->rootkey:"null"),
+ (info->profile?info->profile:"null"));
+ return text;
+}
+
/* Find, load, and parse the aws config &/or credentials file */
int
NC_aws_load_credentials(NCglobalstate* gstate)
@@ -707,15 +788,6 @@ tokenname(int token)
}
#endif
-typedef struct AWSparser {
- char* text;
- char* pos;
- size_t yylen; /* |yytext| */
- NCbytes* yytext;
- int token; /* last token found */
- int pushback; /* allow 1-token pushback */
-} AWSparser;
-
static int
awslex(AWSparser* parser)
{
diff --git a/libdispatch/dtype.c b/libdispatch/dtype.c
index 3de208c1e0..6871b796f8 100644
--- a/libdispatch/dtype.c
+++ b/libdispatch/dtype.c
@@ -38,7 +38,6 @@ type). Read attributes of the new type with nc_get_att (see
/** \{ */
-
/**
\ingroup user_types
Learn if two types are equal.
diff --git a/libdispatch/dutil.c b/libdispatch/dutil.c
index f6f0eecc34..20c1380bec 100644
--- a/libdispatch/dutil.c
+++ b/libdispatch/dutil.c
@@ -539,3 +539,47 @@ NC_joinwith(NClist* segments, const char* sep, const char* prefix, const char* s
ncbytesfree(buf);
return stat;
}
+
+static int
+lexical_compare(const void* arg1, const void* arg2)
+{
+ char* s1 = *((char**)arg1);
+ char* s2 = *((char**)arg2);
+ size_t slen1 = nulllen(s1);
+ size_t slen2 = nulllen(s2);
+ if(slen1 != slen2) return (slen1 - slen2);
+ return strcmp(s1,s2);
+}
+
+/**
+Sort a vector of strings.
+@param n Number of strings to sort
+@param env vector of strings to sort
+*/
+void
+NC_sortenvv(size_t n, char** envv)
+{
+ if(n <= 1) return;
+ qsort(envv, (int)n, sizeof(char*), lexical_compare);
+}
+
+/**
+Sort a nclist of strings.
+@param l NClist of strings
+*/
+void
+NC_sortlist(NClist* l)
+{
+ if(l == NULL || nclistlength(l) == 0) return;
+ NC_sortenvv(nclistlength(l),(char**)nclistcontents(l));
+}
+
+/* Free up a vector of strings */
+void
+NC_freeenvv(size_t nkeys, char** keys)
+{
+ size_t i;
+ for(i=0;ifalse; !0=>true)*/
- NCjson* list;
-} NCjson;
-
-#define NCJ_LBRACKET '['
-#define NCJ_RBRACKET ']'
-#define NCJ_LBRACE '{'
-#define NCJ_RBRACE '}'
-#define NCJ_COLON ':'
-#define NCJ_COMMA ','
-#define NCJ_QUOTE '"'
-#define NCJ_TRUE "true"
-#define NCJ_FALSE "false"
-
-#define NCJ_WORD "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-$"
-
-/*//////////////////////////////////////////////////*/
-
-typedef struct NCJparser {
- char* text;
- char* pos;
- char* yytext;
- int errno;
- struct {
- char* yytext;
- int token;
- } pushback;
-} NCJparser;
-
-static int
-NCjsonparse(char* text, NCjson** treep)
-{
- int status = NCJ_OK;
- size_t len;
- NCJparser parser = NULL;
- NCjson* tree = NULL;
- if(text == NULL) {status = NCJ_EINVAL; goto done;}
- parser = calloc(1,sizeof(NCJparser));
- if(parser == NULL) {status = NCJ_ENOMEM; goto done;}
- len = strlen(text);
- parser->text = (char*)malloc(len+1+1);
- if(parser->text == NULL) {status = NCJ_ENOMEM; goto done;}
- strcpy(parser->text,text);
- parser->text[len] = '\0';
- parser->text[len+1] = '\0';
- tree = NCJparseR(parser);
-done:
- if(parser != NULL) {
- nullfree(parser->text);
- nullfree(parser->yytext);
- free(parser);
- }
- if(status != NCJ_OK) {
- if(tree != NULL) NCjsonfree(tree);
- } else
- if(treep) *treep = tree;
- return status;
-}
-
-static int
-NCJyytext(NCJparser* parser, char* start, ptrdiff_t pdlen)
-{
- size_t len = (size_t)pdlen;
- if(parser->yytext == NULL)
- parser->yytext = (char*)malloc(len+1);
- else
- parser->yytext = (char*) realloc(parser->yytext,len+1);
- if(parser->yytext == NULL) return NCJ_ENOMEM;
- memcpy(parser->yytext,start,len);
- parser->yytext[len] = NCJ_NUL;
- return NCJ_OK;
-}
-
-static void
-NCJpushback(NCJparser* parser, int token)
-{
- parser->pushback.token = token;
- parser->pushback.yytext = strdup(parser->yytext);
-}
-
-static int
-NCJlex(NCJparser* parser)
-{
- int c;
- int token = NCJ_NUL;
- char* start;
- char* next;
-
- if(parser->pushback.token != NCJ_NOTOKEN) {
- token = parser->pushback.token;
- NCJyytext(parser,parser->pushback.yytext,strlen(parser->pushback.yytext));
- nullfree(parser->pushback.yytext);
- parser->pushback.yytext = NULL;
- parser->pushback.token = NCJ_NOTOKEN;
- return token;
- }
-
- c = *parser->pos;
- if(c == NCJ_NUL) {
- token = NCJ_NUL;
- } else if(strchr(NCJ_WORD, c) != NULL) {
- size_t len;
- start = parser->pos;
- next = start + 1;
- for(;;) {
- c = *parser->pos++;
- if(strchr(NCJ_WHITESPACE,c) != NULL || c == NCJ_NUL) break;
- last++;
- }
- if(!NCJyytext(parser,start,(next - start))) goto done;
- token = NCJ_WORD;
- } else if(c == NCJ_QUOTE) {
- parser->pos++;
- start = parser->pos;
- next = start+1;
- for(;;) {
- c = *parser->pos++;
- if(c == NCJ_QUOTE || c == NCJ_NUL) break;
- last++;
- }
- if(c == NCJ_NUL) {
- parser->errno = NCJ_ESTRING;
- token = NCJ_ERR;
- goto done;
- }
- if(!NCJyytext(parser,start,(next - start))) goto done;
- token = NCJ_STRING;
- } else { /* single char token */
- token = *parser->pos++;
- }
-done:
- if(parser->errno) token = NCJ_ERR;
- return token;
-}
-
-/* Simple recursive descent */
-
-static int
-NCJparseR(NCJparser* parser, NCjson** listp)
-{
- int token = NCJ_ERR;
- NCjson* list = NULL;
- if((token = NCJlex(parser)) == NCJ_ERR) goto done;
- switch (token) {
- case NCJ_NUL;
- break;
- case NCJ_WORD:
- NCJappend(NCJparseAtomic(parser,token),listp);
- break;
- case NCJ_LBRACE:
- NCJappend(NCJparseMap(parser,locallist),listp);
- break;
- case NCJ_LBRACKET:
- NCJappend(NCJparseArray(parser,NULL),)
- case NCJ_STRING:
- return NCJparseAtomic(parser,token);
- default:
- parser->errno = NCJ_EBADTOKEN;
- }
- return NULL;
-}
-
-static NCjson*
-NCJparseAtomic(NCJparser* parser, int kind)
-{
- /* assert (kind == NCJ_WORD || kind = NCJ_QUOTE) */
- NCjson* node;
- if((node = NCJmakenode(parser)) == NULL)
- {parser->errno = NCJ_ENOMEM; goto done;}
- if(kind == NCJ_STRING)
- node->sort = NCJ_WORD;
- node->word = strdup(parser->yytext);
- } else {
- /* Try to convert to number or boolean; last resort is word */
- size_t count = (last - start) + 1;
- int nread = 0;
- int ncvt = sscan(parser->yytext,
- "%L",&node->num,&nread);
- if(ncvt == 1 && nread == count) {
- node->sort = NCJ_NUMBER;
- } else if(strcasecmp(parser->yytext,NCJ_TRUE)==0) {
- node->sort = NCJ_BOOLEAN;
- node->num = 1;
- } else if(strcasecmp(parser->yytext,NCJ_FALSE)==0) {
- node->sort = NCJ_BOOLEAN;
- node->num = 0;
- } else {
- node->word = strdup(parser->yytext);
- node->sort = NCJ_WORD;
- }
- }
-done:
- return node;
-}
-
-static NCjson*
-NCJparseArray(NCJparser* parser)
-{
- NCjson* head = NULL;
- NCjson* last = NULL;
- int token = NCJ_ERR;
-#if 0
- if((node = NCJmakenode(parser)) == NULL) goto done;
-#endif
- loop:
- for(;;) {
- if((token = NCJlex(parser)) == NCJ_ERR) goto done;
- switch (token) {
- case NCJ_NUL;
- break;
- case NCJ_RBRACKET:
- break loop;
- default:
- NCJpushback(parser,token);
- NCjson* o = NCJparseR(parser);
- tokens.nextToken();
- if(tokens.ttype == NCJ_EOF) break;
- else if(tokens.ttype == RBRACKET) tokens.pushBack();
- else if(tokens.ttype != COMMA)
- throw new IOException("Missing comma in list");
- array.add(o);
- }
- }
- return array;
-}
-
-static NCjson parseMap(StreamTokenizer tokens)
-{
- assert (tokens.ttype == LBRACE);
- Map map = new LinkedHashMap<>(); /* Keep insertion order */
- loop:
- for(; ; ) {
- int token = tokens.nextToken();
- switch (token) {
- case NCJ_NCJ_EOL:
- break; /* ignore */
- case NCJ_NCJ_EOF:
- throw new IOException("Unexpected eof");
- case NCJ_RBRACE:
- break loop;
- default:
- tokens.pushBack();
- NCjson name = parseR(tokens);
- if(tokens.ttype == NCJ_EOF) break;
- if(name instanceof char*
- || name instanceof Long
- || name instanceof Boolean) {
- /*ok*/
- } else
- throw new IOException("Unexpected map name type: " + name);
- if(tokens.nextToken() != COLON)
- throw new IOException("Expected ':'; found: " + tokens.ttype);
- NCjson o = parseR(tokens);
- tokens.nextToken();
- if(tokens.ttype == NCJ_EOF) break;
- else if(tokens.ttype == RBRACE) tokens.pushBack();
- else if(tokens.ttype != COMMA)
- throw new IOException("Missing comma in list");
- map.put(name.tochar*(), o);
- }
- }
- return map;
-}
-}
-
-static char* tochar*(Object o) {return tochar*(o,"");}
-
-static char* tochar*(Object o, char* demark)
-{
-char*Builder buf = new char*Builder();
-tochar*R(o, buf, demark, 0);
-return buf.tochar*();
-}
-
-static static void tochar*R(Object o, char*Builder buf, char* demark, int indent)
-{
-boolean first = true;
-if(o instanceof List) {
- List