From a635ab54182a56a42c02b4f0f6aee2369efd0ff1 Mon Sep 17 00:00:00 2001 From: Roberto Tyley <52038+rtyley@users.noreply.github.com> Date: Fri, 8 Nov 2024 22:11:43 +0000 Subject: [PATCH] Improve API discoverability/clarity for devs with `S3ByteArrayFetching` Introducing: * **New type alias `S3ByteArrayFetching`** for `Fetching[ObjectId, Array[Byte]]` - when developers encounter a method needing `S3ByteArrayFetching`, the type alias now documents how to create that precise thing. * **New method `S3ObjectFetching.byteArraysWith(s3AsyncClient)`** as a convenience to create a `S3ByteArrayFetching` for AWS SDK v2, removing the need to know about the `Byte` transformer, or the need to map with `_.asByteArray()` This prompted by this PR (aiming to support the PR and make the changes in it easier to adopt): * https://github.com/guardian/facia-scala-client/pull/287 ## How calling code changes Given calling code that already has `"com.gu.etag-caching" %% "aws-s3-sdk-v2"` as a dependency, and has this setup: ```scala import com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching val s3AsyncClient: software.amazon.awssdk.services.s3.S3AsyncClient = ??? // AWS SDK v2 ``` ...the code to get a `Fetching[ObjectId, Array[Byte]]` looks like this: #### Before ```scala S3ObjectFetching(s3AsyncClient, Bytes).mapResponse(_.asByteArray()) ``` #### After ```scala S3ObjectFetching.byteArraysWith(s3AsyncClient) ``` --- .../aws/sdkv2/s3/S3ObjectFetching.scala | 10 +++- .../src/test/resources/demo-files/goodbye.txt | 1 + .../src/test/resources/demo-files/hello.txt | 1 + .../aws/sdkv2/s3/S3ClientForS3Mock.scala | 2 +- .../aws/sdkv2/s3/S3ObjectFetchingTest.scala | 58 ++++++++++++++----- .../com/gu/etagcaching/aws/s3/package.scala | 23 ++++++++ .../gu/etagcaching/FreshnessPolicyTest.scala | 2 +- 7 files changed, 80 insertions(+), 17 deletions(-) create mode 100644 aws-s3/aws-sdk-v2/src/test/resources/demo-files/goodbye.txt create mode 100644 aws-s3/aws-sdk-v2/src/test/resources/demo-files/hello.txt create mode 100644 aws-s3/base/src/main/scala/com/gu/etagcaching/aws/s3/package.scala diff --git a/aws-s3/aws-sdk-v2/src/main/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetching.scala b/aws-s3/aws-sdk-v2/src/main/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetching.scala index 2b414c7..590eee8 100644 --- a/aws-s3/aws-sdk-v2/src/main/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetching.scala +++ b/aws-s3/aws-sdk-v2/src/main/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetching.scala @@ -1,8 +1,9 @@ package com.gu.etagcaching.aws.sdkv2.s3 import com.gu.etagcaching.Endo -import com.gu.etagcaching.aws.s3.ObjectId +import com.gu.etagcaching.aws.s3.{ObjectId, S3ByteArrayFetching} import com.gu.etagcaching.aws.sdkv2.s3.response.Transformer +import com.gu.etagcaching.aws.sdkv2.s3.response.Transformer.Bytes import com.gu.etagcaching.fetching.{ETaggedData, Fetching, Missing, MissingOrETagged} import software.amazon.awssdk.core.internal.util.ThrowableUtils import software.amazon.awssdk.services.s3.S3AsyncClient @@ -42,3 +43,10 @@ case class S3ObjectFetching[Response](s3Client: S3AsyncClient, transformer: Tran } } +object S3ObjectFetching { + /** + * Convenience method for creating a fetcher that just returns a simple byte array. + */ + def byteArraysWith(s3AsyncClient: S3AsyncClient): S3ByteArrayFetching = + S3ObjectFetching(s3AsyncClient, Bytes).mapResponse(_.asByteArray()) +} \ No newline at end of file diff --git a/aws-s3/aws-sdk-v2/src/test/resources/demo-files/goodbye.txt b/aws-s3/aws-sdk-v2/src/test/resources/demo-files/goodbye.txt new file mode 100644 index 0000000..b4ff055 --- /dev/null +++ b/aws-s3/aws-sdk-v2/src/test/resources/demo-files/goodbye.txt @@ -0,0 +1 @@ +Au revoir! \ No newline at end of file diff --git a/aws-s3/aws-sdk-v2/src/test/resources/demo-files/hello.txt b/aws-s3/aws-sdk-v2/src/test/resources/demo-files/hello.txt new file mode 100644 index 0000000..c57eff5 --- /dev/null +++ b/aws-s3/aws-sdk-v2/src/test/resources/demo-files/hello.txt @@ -0,0 +1 @@ +Hello World! \ No newline at end of file diff --git a/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ClientForS3Mock.scala b/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ClientForS3Mock.scala index 53e8fcd..65bee15 100644 --- a/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ClientForS3Mock.scala +++ b/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ClientForS3Mock.scala @@ -11,7 +11,7 @@ import software.amazon.awssdk.utils.AttributeMap import java.net.URI object S3ClientForS3Mock { - def createS3clientFor(s3Mock: S3MockContainer) = S3AsyncClient.builder() + def createS3clientFor(s3Mock: S3MockContainer): S3AsyncClient = S3AsyncClient.builder() .region(Region.of("us-east-1")) .credentialsProvider( StaticCredentialsProvider.create(AwsBasicCredentials.create("foo", "bar"))) diff --git a/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetchingTest.scala b/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetchingTest.scala index fe3b6cc..8b8db5f 100644 --- a/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetchingTest.scala +++ b/aws-s3/aws-sdk-v2/src/test/scala/com/gu/etagcaching/aws/sdkv2/s3/S3ObjectFetchingTest.scala @@ -3,11 +3,12 @@ package com.gu.etagcaching.aws.sdkv2.s3 import com.adobe.testing.s3mock.testcontainers.S3MockContainer import com.gu.etagcaching.ETagCache import com.gu.etagcaching.FreshnessPolicy.AlwaysWaitForRefreshedValue -import com.gu.etagcaching.aws.s3.ObjectId +import com.gu.etagcaching.aws.s3.{ObjectId, S3ByteArrayFetching} import com.gu.etagcaching.aws.sdkv2.s3.ExampleParser.parseFruit import com.gu.etagcaching.aws.sdkv2.s3.S3ClientForS3Mock.createS3clientFor +import com.gu.etagcaching.aws.sdkv2.s3.TestS3Objects.bucket import com.gu.etagcaching.aws.sdkv2.s3.response.Transformer.Bytes -import org.scalatest.{BeforeAndAfter, OptionValues} +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, OptionValues} import org.scalatest.concurrent.{IntegrationPatience, ScalaFutures} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers @@ -15,21 +16,32 @@ import software.amazon.awssdk.services.s3.S3AsyncClient import software.amazon.awssdk.services.s3.model.PutObjectRequest import java.io.File +import java.util.concurrent.atomic.AtomicInteger import java.util.zip.GZIPInputStream import scala.compat.java8.FutureConverters._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt -class S3ObjectFetchingTest extends AnyFlatSpec with Matchers with ScalaFutures with OptionValues with IntegrationPatience with BeforeAndAfter { - val ExampleS3Object: ObjectId = ObjectId("test-bucket", "path") - val ExampleMissingS3Object: ObjectId = ObjectId("test-bucket", "nothing-should-be-here") +object TestS3Objects { + val bucket = "test-bucket" + val atomicInteger = new AtomicInteger() + def generate(): TestS3Objects = TestS3Objects(atomicInteger.getAndIncrement()) +} +case class TestS3Objects(id: Int) { + val example: ObjectId = ObjectId(bucket, s"$id/path") + val nonExistent: ObjectId = ObjectId(bucket, s"$id/nothing-should-be-here") +} - val s3Mock: S3MockContainer = new S3MockContainer("latest").withInitialBuckets(ExampleS3Object.bucket) - before(s3Mock.start()) - after(s3Mock.stop()) +class S3ObjectFetchingTest extends AnyFlatSpec with Matchers with ScalaFutures with OptionValues with IntegrationPatience with BeforeAndAfterAll { + + val s3Mock: S3MockContainer = new S3MockContainer("latest").withInitialBuckets(TestS3Objects.bucket) + override def beforeAll(): Unit = s3Mock.start() + override def afterAll(): Unit = s3Mock.stop() lazy val s3Client: S3AsyncClient = createS3clientFor(s3Mock) // lazy val because we need the s3Mock to start first "S3ObjectFetching" should "have an example to show how an S3-backed ETagCache is set up" in { + implicit val testS3Objects: TestS3Objects = TestS3Objects.generate() + val fruitCache = new ETagCache[ObjectId, Fruit]( S3ObjectFetching(s3Client, Bytes).timing( successWith = d => println(s"Success: $d"), @@ -44,21 +56,39 @@ class S3ObjectFetchingTest extends AnyFlatSpec with Matchers with ScalaFutures w ) uploadFile("banana.xml.gz") - fruitCache.get(ExampleS3Object).futureValue.value.colour shouldBe "yellow" - fruitCache.get(ExampleS3Object).futureValue.value.colour shouldBe "yellow" + fruitCache.get(testS3Objects.example).futureValue.value.colour shouldBe "yellow" + fruitCache.get(testS3Objects.example).futureValue.value.colour shouldBe "yellow" uploadFile("kiwi.xml.gz") - fruitCache.get(ExampleS3Object).futureValue.value.colour shouldBe "green" + fruitCache.get(testS3Objects.example).futureValue.value.colour shouldBe "green" // Note that the value is correct, without us explicitly clearing the cache - ETag-checking saved us! - fruitCache.get(ExampleMissingS3Object).futureValue shouldBe None + fruitCache.get(testS3Objects.nonExistent).futureValue shouldBe None + } + + it should "support a simple way to fetch byte arrays" in { + implicit val testS3Objects: TestS3Objects = TestS3Objects.generate() + + val s3Fetching: S3ByteArrayFetching = S3ObjectFetching.byteArraysWith(s3Client) + + val cache = new ETagCache( + s3Fetching.thenParsing(bytes => new String(bytes)), + AlwaysWaitForRefreshedValue, + _.maximumSize(500).expireAfterAccess(1.hour) + ) + + uploadFile("hello.txt") + cache.get(testS3Objects.example).futureValue.value shouldBe "Hello World!" + + uploadFile("goodbye.txt") + cache.get(testS3Objects.example).futureValue.value shouldBe "Au revoir!" } - private def uploadFile(demoFile: String): Unit = { + private def uploadFile(demoFile: String)(implicit testS3Objects: TestS3Objects): Unit = { val path = new File(getClass.getClassLoader.getResource("demo-files/" + demoFile).getFile).toPath val s3response = s3Client.putObject( - PutObjectRequest.builder().bucket(ExampleS3Object.bucket).key(ExampleS3Object.key).build(), + PutObjectRequest.builder().bucket(testS3Objects.example.bucket).key(testS3Objects.example.key).build(), path ).toScala.futureValue diff --git a/aws-s3/base/src/main/scala/com/gu/etagcaching/aws/s3/package.scala b/aws-s3/base/src/main/scala/com/gu/etagcaching/aws/s3/package.scala new file mode 100644 index 0000000..20aa4d4 --- /dev/null +++ b/aws-s3/base/src/main/scala/com/gu/etagcaching/aws/s3/package.scala @@ -0,0 +1,23 @@ +package com.gu.etagcaching.aws + +import com.gu.etagcaching.fetching.Fetching + +package object s3 { + /** + * This type provides an interface to get bytes out of S3, independent of the version of the + * AWS SDK in use. When code depends on this interface, it's not directly tied to AWS SDK + * version 1 or 2, and consumers can provide an instance of the interface backed by whatever + * client they prefer (even AWS SDK v1, though that is discouraged). + * + * Assuming you're using AWS SDK v2, get an instance of `S3ByteArrayFetching` by adding + * "com.gu.etag-caching" %% "aws-s3-sdk-v2" as a dependency, then: + * + * {{{ + * import com.gu.etagcaching.aws.sdkv2.s3.S3ObjectFetching + * + * val s3AsyncClient: software.amazon.awssdk.services.s3.S3AsyncClient = ??? // AWS SDK v2 + * val s3Fetching: S3ByteArrayFetching = S3ObjectFetching.byteArraysWith(s3AsyncClient) + * }}} + */ + type S3ByteArrayFetching = Fetching[ObjectId, Array[Byte]] +} diff --git a/core/src/test/scala/com/gu/etagcaching/FreshnessPolicyTest.scala b/core/src/test/scala/com/gu/etagcaching/FreshnessPolicyTest.scala index c94371c..bbe8f14 100644 --- a/core/src/test/scala/com/gu/etagcaching/FreshnessPolicyTest.scala +++ b/core/src/test/scala/com/gu/etagcaching/FreshnessPolicyTest.scala @@ -50,7 +50,7 @@ class FreshnessPolicyTest extends AnyFlatSpec with Matchers with ScalaFutures { val demo = DemoCache(TolerateOldValueWhileRefreshing) demo.read() shouldBe 0 - failAfter(2.millis) { // should be instant, because we're _not_ waiting for the ETag-checking fetch + failAfter(5.millis) { // should be instant, because we're _not_ waiting for the ETag-checking fetch demo.read() shouldBe 0 } }