diff --git a/src/rest_vol.c b/src/rest_vol.c index 630c3a95..b30e4986 100644 --- a/src/rest_vol.c +++ b/src/rest_vol.c @@ -34,6 +34,8 @@ #define BACKOFF_SCALE_FACTOR 1.5 #define BACKOFF_MAX_BEFORE_FAIL 3000000000 /* 30,000,000,000 ns -> 30 sec */ +/* Number of unique characters which need to be escaped before being sent as JSON */ +#define NUM_JSON_ESCAPE_CHARS 7 /* * The VOL connector identification number. */ @@ -2233,6 +2235,7 @@ RV_find_object_by_path(RV_object_t *parent_obj, const char *obj_path, H5I_type_t if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_HTTPGET, 1)) FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set up cURL to make HTTP GET request: %s", curl_err_buf); + if (CURLE_OK != curl_easy_setopt(curl, CURLOPT_URL, request_url)) FUNC_GOTO_ERROR(H5E_LINK, H5E_CANTSET, FAIL, "can't set cURL request URL: %s", curl_err_buf); @@ -3826,3 +3829,84 @@ RV_free_visited_link_hash_table_key(rv_hash_table_key_t value) RV_free(value); value = NULL; } /* end RV_free_visited_link_hash_table_key() */ + +/*------------------------------------------------------------------------- + * Function: RV_JSON_escape_string + * + * Purpose: Helper function to escape control characters for JSON strings. + * If 'out' is NULL, out_size will be changed to the buffer size + * needed for the escaped version of 'in'. + * If 'out' is non-NULL, it should be a buffer of out_size bytes + * that will be populated with the escaped version of 'in'. + * If the provided buffer is too small and this operation fails, + * the value of the buffer will still be modified. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Matthew Larson + * January, 2024 + */ +herr_t +RV_JSON_escape_string(const char *in, char *out, size_t *out_size) +{ + herr_t ret_value = SUCCEED; + size_t in_size = strlen(in); + + char *out_ptr = NULL; + char escape_characters[NUM_JSON_ESCAPE_CHARS] = {'\b', '\f', '\n', '\r', '\t', '\"', '\\'}; + + if (out == NULL) { + /* Determine necessary buffer size */ + *out_size = in_size + 1; + + for (size_t i = 0; i < in_size; i++) { + char c = in[i]; + + for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) { + char ec = escape_characters[j]; + + /* Each escaped character requires additional '\' in final string */ + if (c == ec) + *out_size += 1; + } + } + } + else { + /* Escaped string is at least as long as original */ + if (*out_size < strlen(in) + 1) + FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "escaped buffer is smaller than original"); + + /* Populate provided buffer */ + out_ptr = out; + + for (size_t i = 0; i < in_size; i++) { + char c = in[i]; + + for (size_t j = 0; j < NUM_JSON_ESCAPE_CHARS; j++) { + char ec = escape_characters[j]; + + if (c == ec) { + if ((out_ptr - out + 1) > *out_size) + FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string"); + out_ptr[0] = '\\'; + out_ptr++; + } + } + + if ((out_ptr - out + 1) > *out_size) + FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string"); + + out_ptr[0] = c; + out_ptr++; + } + + if ((out_ptr - out + 1) > *out_size) + FUNC_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "buffer too small for encoded string"); + + out_ptr[0] = '\0'; + } + +done: + + return ret_value; +} diff --git a/src/rest_vol.h b/src/rest_vol.h index 656aa734..0e892e2a 100644 --- a/src/rest_vol.h +++ b/src/rest_vol.h @@ -757,6 +757,9 @@ herr_t RV_tconv_init(hid_t src_type_id, size_t *src_type_size, hid_t dst_type_id herr_t RV_convert_datatype_to_JSON(hid_t type_id, char **type_body, size_t *type_body_len, hbool_t nested, server_api_version server_version); +/* Helper function to escape control characters for JSON strings */ +herr_t RV_JSON_escape_string(const char *in, char *out, size_t *out_size); + #define SERVER_VERSION_MATCHES_OR_EXCEEDS(version, major_needed, minor_needed, patch_needed) \ (version.major > major_needed) || (version.major == major_needed && version.minor > minor_needed) || \ (version.major == major_needed && version.minor == minor_needed && version.patch >= patch_needed) diff --git a/src/rest_vol_dataset.c b/src/rest_vol_dataset.c index a309071f..6cbec8ee 100644 --- a/src/rest_vol_dataset.c +++ b/src/rest_vol_dataset.c @@ -3568,6 +3568,7 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t char *creation_properties_body = NULL; char *link_body = NULL; char *path_dirname = NULL; + char *escaped_link_name = NULL; int create_request_len = 0; int link_body_len = 0; herr_t ret_value = SUCCEED; @@ -3612,11 +3613,12 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t if (name) { hbool_t empty_dirname; char target_URI[URI_MAX_LENGTH]; - const char *const link_basename = H5_rest_basename(name); - const char *const link_body_format = "\"link\": {" - "\"id\": \"%s\", " - "\"name\": \"%s\"" - "}"; + const char *const link_basename = H5_rest_basename(name); + const char *const link_body_format = "\"link\": {" + "\"id\": \"%s\", " + "\"name\": \"%s\"" + "}"; + size_t escaped_name_size = 0; #ifdef RV_CONNECTOR_DEBUG printf("-> Creating JSON link for dataset\n\n"); @@ -3643,7 +3645,17 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t FUNC_GOTO_ERROR(H5E_DATASET, H5E_PATH, FAIL, "can't locate target for dataset link"); } /* end if */ - link_body_nalloc = strlen(link_body_format) + strlen(link_basename) + + /* JSON-escape link name */ + if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't get length of JSON escaped link name"); + + if ((escaped_link_name = RV_malloc(escaped_name_size)) == NULL) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for escaped link name"); + + if (RV_JSON_escape_string(link_basename, escaped_link_name, &escaped_name_size) < 0) + FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "can't JSON escape link name"); + + link_body_nalloc = strlen(link_body_format) + strlen(escaped_link_name) + (empty_dirname ? strlen(pobj->URI) : strlen(target_URI)) + 1; if (NULL == (link_body = (char *)RV_malloc(link_body_nalloc))) FUNC_GOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "can't allocate space for dataset link body"); @@ -3651,7 +3663,7 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t /* Form the Dataset Creation Link portion of the Dataset create request using the above format * specifier and the corresponding arguments */ if ((link_body_len = snprintf(link_body, link_body_nalloc, link_body_format, - empty_dirname ? pobj->URI : target_URI, link_basename)) < 0) + empty_dirname ? pobj->URI : target_URI, escaped_link_name)) < 0) FUNC_GOTO_ERROR(H5E_DATASET, H5E_SYSERRSTR, FAIL, "snprintf error"); if ((size_t)link_body_len >= link_body_nalloc) @@ -3721,6 +3733,8 @@ RV_setup_dataset_create_request_body(void *parent_obj, const char *name, hid_t t RV_free(shape_body); if (datatype_body) RV_free(datatype_body); + if (escaped_link_name) + RV_free(escaped_link_name); return ret_value; } /* end RV_setup_dataset_create_request_body() */ diff --git a/src/rest_vol_group.c b/src/rest_vol_group.c index 01d06007..95ec2d90 100644 --- a/src/rest_vol_group.c +++ b/src/rest_vol_group.c @@ -55,6 +55,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name char *base64_plist_buffer = NULL; char target_URI[URI_MAX_LENGTH]; char request_url[URL_MAX_LENGTH]; + char *escaped_group_name = NULL; int create_request_body_len = 0; int url_len = 0; void *binary_plist_buffer = NULL; @@ -128,6 +129,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name if (name) { const char *path_basename = H5_rest_basename(name); hbool_t empty_dirname; + size_t escaped_name_size = 0; #ifdef RV_CONNECTOR_DEBUG printf("-> Creating JSON link for group\n\n"); @@ -204,7 +206,17 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name if (RV_base64_encode(binary_plist_buffer, plist_nalloc, &base64_plist_buffer, &base64_buf_size) < 0) FUNC_GOTO_ERROR(H5E_PLIST, H5E_CANTENCODE, NULL, "failed to base64 encode plist binary"); - create_request_nalloc = strlen(fmt_string) + strlen(path_basename) + + /* Escape group name to be sent as JSON */ + if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0) + FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't get size of JSON escaped group name"); + + if ((escaped_group_name = RV_malloc(escaped_name_size)) == NULL) + FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTALLOC, NULL, "can't allocate space for escaped group name"); + + if (RV_JSON_escape_string(path_basename, escaped_group_name, &escaped_name_size) < 0) + FUNC_GOTO_ERROR(H5E_SYM, H5E_CANTENCODE, NULL, "can't JSON escape group name"); + + create_request_nalloc = strlen(fmt_string) + strlen(escaped_group_name) + (empty_dirname ? strlen(parent->URI) : strlen(target_URI)) + base64_buf_size + 1; if (NULL == (create_request_body = (char *)RV_malloc(create_request_nalloc))) @@ -212,7 +224,7 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name "can't allocate space for group create request body"); if ((create_request_body_len = snprintf(create_request_body, create_request_nalloc, fmt_string, - empty_dirname ? parent->URI : target_URI, path_basename, + empty_dirname ? parent->URI : target_URI, escaped_group_name, (char *)base64_plist_buffer)) < 0) FUNC_GOTO_ERROR(H5E_SYM, H5E_SYSERRSTR, NULL, "snprintf error"); @@ -323,6 +335,9 @@ RV_group_create(void *obj, const H5VL_loc_params_t *loc_params, const char *name curl_headers = NULL; } /* end if */ + if (escaped_group_name) + RV_free(escaped_group_name); + PRINT_ERROR_STACK; return ret_value;