Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Copy Operation by taking the Source URI #482

Merged
merged 21 commits into from
Jan 13, 2025
Merged
2 changes: 1 addition & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
run: |
python3 -c "from urllib.request import urlretrieve; urlretrieve('${{ env.BUILDER_HOST }}/${{ env.BUILDER_SOURCE }}/${{ env.BUILDER_VERSION }}/builder.pyz?run=${{ env.RUN }}', 'builder')"
chmod a+x builder
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-9 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-12 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c
4 changes: 4 additions & 0 deletions include/aws/s3/private/s3_copy_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include "aws/s3/private/s3_meta_request_impl.h"
#include <aws/common/uri.h>

enum aws_s3_copy_object_request_tag {
AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE,
Expand All @@ -25,6 +26,9 @@ struct aws_s3_copy_object {
/* Usable after the Create Multipart Upload request succeeds. */
struct aws_string *upload_id;

/* (Optional) source_uri for the copy operation. */
struct aws_uri source_uri;

/* Only meant for use in the update function, which is never called concurrently. */
struct {
uint32_t next_part_number;
Expand Down
3 changes: 2 additions & 1 deletion include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ struct aws_http_message *aws_s3_get_object_size_message_new(
AWS_S3_API
struct aws_http_message *aws_s3_get_source_object_size_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message);
struct aws_http_message *base_message,
struct aws_uri *source_uri);

/* Add content-md5 header to the http message passed in. The MD5 will be computed from the input_buf */
AWS_S3_API
Expand Down
9 changes: 9 additions & 0 deletions include/aws/s3/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ enum aws_s3_meta_request_type {
* source will not work
* - source bucket is assumed to be in the same region as dest
* - source bucket and dest bucket must both be either directory buckets or regular buckets.
*
* Provide the `meta_request_options.copy_source_uri` to bypass these limitations.
*/
AWS_S3_META_REQUEST_TYPE_COPY_OBJECT,

Expand Down Expand Up @@ -869,6 +871,13 @@ struct aws_s3_meta_request_options {
* This is just used as an estimate, so it's okay to provide an approximate value if the exact size is unknown.
*/
const uint64_t *object_size_hint;

/*
* (Optional)
* If performing a copy operation, provide the source URI here to bypass the limitations of the copy operation.
* This will be ignored for other operations.
*/
const struct aws_byte_cursor copy_source_uri;
};

/* Result details of a meta request.
Expand Down
18 changes: 15 additions & 3 deletions source/s3_copy_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ struct aws_s3_meta_request *aws_s3_meta_request_copy_object_new(
copy_object,
&s_s3_copy_object_vtable,
&copy_object->base)) {
aws_mem_release(allocator, copy_object);
return NULL;
goto on_error;
}

aws_array_list_init_dynamic(
Expand All @@ -92,10 +91,22 @@ struct aws_s3_meta_request *aws_s3_meta_request_copy_object_new(
copy_object->synced_data.content_length = UNKNOWN_CONTENT_LENGTH;
copy_object->synced_data.total_num_parts = UNKNOWN_NUM_PARTS;
copy_object->threaded_update_data.next_part_number = 1;
if (options->copy_source_uri.len != 0) {
if (aws_uri_init_parse(&copy_object->source_uri, allocator, &options->copy_source_uri)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"Unable to parse the copy_source_uri provided in the request: " PRInSTR "",
AWS_BYTE_CURSOR_PRI(options->copy_source_uri));
goto on_error;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the copy_object->synced_data.part_list was initialized above, it needs to be cleaned up here.

Or move the initialization of the list after anything can fail

We could add a simple test to pass in an invalid URI to add a bit more test coverage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, good catch. Fixed with a test.

}
}

AWS_LOGF_DEBUG(AWS_LS_S3_META_REQUEST, "id=%p Created new CopyObject Meta Request.", (void *)&copy_object->base);

return &copy_object->base;
on_error:
aws_mem_release(allocator, copy_object);
return NULL;
}

static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *meta_request) {
Expand All @@ -105,6 +116,7 @@ static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *me
struct aws_s3_copy_object *copy_object = meta_request->impl;

aws_string_destroy(copy_object->upload_id);
aws_uri_clean_up(&copy_object->source_uri);
copy_object->upload_id = NULL;

for (size_t part_index = 0; part_index < aws_array_list_length(&copy_object->synced_data.part_list); ++part_index) {
Expand Down Expand Up @@ -364,7 +376,7 @@ static struct aws_future_void *s_s3_copy_object_prepare_request(struct aws_s3_re
/* Prepares the GetObject HEAD request to get the source object size. */
case AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE: {
message = aws_s3_get_source_object_size_message_new(
meta_request->allocator, meta_request->initial_request_message);
meta_request->allocator, meta_request->initial_request_message, &copy_object->source_uri);
break;
}

Expand Down
41 changes: 31 additions & 10 deletions source/s3_request_messages.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <aws/common/byte_buf.h>
#include <aws/common/encoding.h>
#include <aws/common/string.h>
#include <aws/common/uri.h>
#include <aws/http/request_response.h>
#include <aws/io/async_stream.h>
#include <aws/io/stream.h>
Expand Down Expand Up @@ -457,8 +458,37 @@ static const struct aws_byte_cursor s_slash_char = AWS_BYTE_CUR_INIT_FROM_STRING
*/
struct aws_http_message *aws_s3_get_source_object_size_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message) {
struct aws_http_message *base_message,
struct aws_uri *source_uri) {
struct aws_http_message *message = NULL;

message = aws_http_message_new_request(allocator);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lot of the error handling down there assumes there was nothing to clean up and just returns null.

Need to update them since we now allocate the message at the beginning of the function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, updated.

if (message == NULL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to worry about the signing region here today as well? or we are keeping the source must be in the same region as dest behavior?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. For now, I have updated the docs to mention that only limitations 1 and 2 will be bypassed using URI.

goto error_cleanup;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

head_object_host_header have not been defined yet, but we will try to clean it up from this goto.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, fixed.

}

if (aws_http_message_set_request_method(message, g_head_method)) {
goto error_cleanup;
}
if (source_uri != NULL && source_uri->self_size > 0) {
/* Parse source host header and path from the provided URI */
struct aws_byte_cursor host = *aws_uri_host_name(source_uri);
struct aws_byte_cursor path = *aws_uri_path(source_uri);
struct aws_http_header host_header = {
.name = g_host_header_name,
.value = host,
};
if (aws_http_message_add_header(message, host_header)) {
goto error_cleanup;
}

if (aws_http_message_set_request_path(message, path)) {
goto error_cleanup;
}
return message;
}

/* Parse the source host header and path from the x-amz-copy-source header and the destination URI */
struct aws_byte_buf head_object_host_header;
AWS_ZERO_STRUCT(head_object_host_header);

Expand Down Expand Up @@ -529,15 +559,6 @@ struct aws_http_message *aws_s3_get_source_object_size_message_new(
goto error_cleanup;
}

message = aws_http_message_new_request(allocator);
if (message == NULL) {
goto error_cleanup;
}

if (aws_http_message_set_request_method(message, g_head_method)) {
goto error_cleanup;
}

struct aws_http_header host_header = {
.name = g_host_header_name,
.value = aws_byte_cursor_from_buf(&head_object_host_header),
Expand Down
97 changes: 91 additions & 6 deletions tests/s3_data_plane_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -6509,6 +6509,9 @@ static int s_test_s3_copy_object_helper(
int expected_error_code,
int expected_response_status,
uint64_t expected_size) {
struct aws_s3_tester tester;
AWS_ZERO_STRUCT(tester);
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));

struct aws_byte_cursor source_bucket = g_test_bucket_name;
struct aws_byte_cursor destination_bucket = g_test_bucket_name;
Expand All @@ -6522,16 +6525,76 @@ static int s_test_s3_copy_object_helper(
destination_bucket.ptr,
g_test_s3_region.ptr);

return aws_test_s3_copy_object_helper(
struct aws_byte_cursor copy_source_uri;
struct aws_byte_buf encoded_path;
AWS_ZERO_STRUCT(copy_source_uri);
AWS_ZERO_STRUCT(encoded_path);

aws_byte_buf_init(&encoded_path, allocator, source_key.len);
aws_byte_buf_append_encoding_uri_path(&encoded_path, &source_key);

/* without copy_source_uri */
ASSERT_SUCCESS(aws_test_s3_copy_object_helper(
allocator,
&tester,
source_bucket,
source_key,
aws_byte_cursor_from_c_str(endpoint),
destination_key,
expected_error_code,
expected_response_status,
expected_size,
false,
copy_source_uri));

/* with path style copy_source_uri */
char source_url[1024];
snprintf(
source_url,
sizeof(source_url),
"https://s3.%s.amazonaws.com/" PRInSTR "/" PRInSTR "",
g_test_s3_region.ptr,
AWS_BYTE_CURSOR_PRI(source_bucket),
AWS_BYTE_BUF_PRI(encoded_path));
copy_source_uri = aws_byte_cursor_from_c_str(source_url);
ASSERT_SUCCESS(aws_test_s3_copy_object_helper(
allocator,
&tester,
source_bucket,
source_key,
aws_byte_cursor_from_c_str(endpoint),
destination_key,
expected_error_code,
expected_response_status,
expected_size,
false);
false,
copy_source_uri));

/* with virtual style copy_source_uri */
snprintf(
source_url,
sizeof(source_url),
"https://" PRInSTR ".s3.%s.amazonaws.com/" PRInSTR "",
AWS_BYTE_CURSOR_PRI(source_bucket),
g_test_s3_region.ptr,
AWS_BYTE_BUF_PRI(encoded_path));
copy_source_uri = aws_byte_cursor_from_c_str(source_url);
ASSERT_SUCCESS(aws_test_s3_copy_object_helper(
allocator,
&tester,
source_bucket,
source_key,
aws_byte_cursor_from_c_str(endpoint),
destination_key,
expected_error_code,
expected_response_status,
expected_size,
false,
copy_source_uri));

aws_s3_tester_clean_up(&tester);
aws_byte_buf_clean_up(&encoded_path);
return AWS_OP_SUCCESS;
}

AWS_TEST_CASE(test_s3_copy_small_object, s_test_s3_copy_small_object)
Expand Down Expand Up @@ -6599,6 +6662,9 @@ static int s_test_s3_copy_object_invalid_source_key(struct aws_allocator *alloca
AWS_TEST_CASE(test_s3_copy_source_prefixed_by_slash, s_test_s3_copy_source_prefixed_by_slash)
static int s_test_s3_copy_source_prefixed_by_slash(struct aws_allocator *allocator, void *ctx) {
(void)ctx;
struct aws_s3_tester tester;
AWS_ZERO_STRUCT(tester);
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));

struct aws_byte_cursor source_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("pre-existing-1MB");
struct aws_byte_cursor destination_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("copies/destination_1MB");
Expand Down Expand Up @@ -6627,15 +6693,23 @@ static int s_test_s3_copy_source_prefixed_by_slash(struct aws_allocator *allocat
destination_bucket.ptr,
g_test_s3_region.ptr);

return aws_test_s3_copy_object_from_x_amz_copy_source(
struct aws_byte_cursor copy_source_uri;
AWS_ZERO_STRUCT(copy_source_uri);

ASSERT_SUCCESS(aws_test_s3_copy_object_from_x_amz_copy_source(
allocator,
&tester,
x_amz_copy_source,
aws_byte_cursor_from_c_str(endpoint),
destination_key,
AWS_ERROR_SUCCESS,
AWS_HTTP_STATUS_CODE_200_OK,
MB_TO_BYTES(1),
false);
false,
copy_source_uri));

aws_s3_tester_clean_up(&tester);
return AWS_OP_SUCCESS;
}

/**
Expand All @@ -6646,6 +6720,9 @@ static int s_test_s3_copy_source_prefixed_by_slash(struct aws_allocator *allocat
AWS_TEST_CASE(test_s3_copy_source_prefixed_by_slash_multipart, s_test_s3_copy_source_prefixed_by_slash_multipart)
static int s_test_s3_copy_source_prefixed_by_slash_multipart(struct aws_allocator *allocator, void *ctx) {
(void)ctx;
struct aws_s3_tester tester;
AWS_ZERO_STRUCT(tester);
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));

struct aws_byte_cursor source_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("pre-existing-256MB");
struct aws_byte_cursor destination_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("copies/destination_256MB");
Expand Down Expand Up @@ -6674,15 +6751,23 @@ static int s_test_s3_copy_source_prefixed_by_slash_multipart(struct aws_allocato
destination_bucket.ptr,
g_test_s3_region.ptr);

return aws_test_s3_copy_object_from_x_amz_copy_source(
struct aws_byte_cursor copy_source_uri;
AWS_ZERO_STRUCT(copy_source_uri);

ASSERT_SUCCESS(aws_test_s3_copy_object_from_x_amz_copy_source(
allocator,
&tester,
x_amz_copy_source,
aws_byte_cursor_from_c_str(endpoint),
destination_key,
AWS_ERROR_SUCCESS,
AWS_HTTP_STATUS_CODE_200_OK,
MB_TO_BYTES(256),
false);
false,
copy_source_uri));

aws_s3_tester_clean_up(&tester);
return AWS_OP_SUCCESS;
}

static int s_s3_get_object_mrap_helper(struct aws_allocator *allocator, bool multipart) {
Expand Down
30 changes: 26 additions & 4 deletions tests/s3_s3express_client_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,41 +654,63 @@ TEST_CASE(s3express_client_get_object_create_session_error) {
*/
TEST_CASE(s3express_client_copy_object) {
(void)ctx;
struct aws_s3_tester tester;
AWS_ZERO_STRUCT(tester);
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));

struct aws_byte_cursor source_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("pre-existing-10MB");
struct aws_byte_cursor destination_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("copies/destination_10MB");
struct aws_byte_cursor source_bucket =
AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("aws-c-s3-test-bucket--usw2-az1--x-s3");
return aws_test_s3_copy_object_helper(
struct aws_byte_cursor copy_source_uri;
AWS_ZERO_STRUCT(copy_source_uri);

ASSERT_SUCCESS(aws_test_s3_copy_object_helper(
allocator,
&tester,
source_bucket,
source_key,
g_test_s3express_bucket_usw2_az1_endpoint,
destination_key,
AWS_ERROR_SUCCESS,
AWS_HTTP_STATUS_CODE_200_OK,
MB_TO_BYTES(10),
true);
true,
copy_source_uri));

aws_s3_tester_clean_up(&tester);
return AWS_OP_SUCCESS;
}

/**
* Test multipart copy object within the same directory bucket.
*/
TEST_CASE(s3express_client_copy_object_multipart) {
(void)ctx;
struct aws_s3_tester tester;
AWS_ZERO_STRUCT(tester);
ASSERT_SUCCESS(aws_s3_tester_init(allocator, &tester));

struct aws_byte_cursor source_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("pre-existing-2GB");
struct aws_byte_cursor destination_key = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("copies/destination_2GB");
struct aws_byte_cursor source_bucket =
AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("aws-c-s3-test-bucket--usw2-az1--x-s3");
return aws_test_s3_copy_object_helper(
struct aws_byte_cursor copy_source_uri;
AWS_ZERO_STRUCT(copy_source_uri);

ASSERT_SUCCESS(aws_test_s3_copy_object_helper(
allocator,
&tester,
source_bucket,
source_key,
g_test_s3express_bucket_usw2_az1_endpoint,
destination_key,
AWS_ERROR_SUCCESS,
AWS_HTTP_STATUS_CODE_200_OK,
GB_TO_BYTES(2),
true);
true,
copy_source_uri));

aws_s3_tester_clean_up(&tester);
return AWS_OP_SUCCESS;
}
Loading