From a3c02b3d026ee9cf57efb3696b9c741f294f07cb Mon Sep 17 00:00:00 2001 From: Tony McCrae Date: Thu, 14 Jan 2021 11:55:41 +0000 Subject: [PATCH 01/18] Remove old elastic related config vals. Thrall and MediaAPI both configure Elastic themselves --- .../scala/com/gu/mediaservice/lib/config/CommonConfig.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala index ee585a0342..e2ce3b9301 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala @@ -11,11 +11,6 @@ import scala.util.Try abstract class CommonConfig(val configuration: Configuration) extends AwsClientBuilderUtils with StrictLogging { - final val elasticsearchStack = "media-service" - - final val elasticsearchApp = "elasticsearch" - final val elasticsearch6App = "elasticsearch6" - final val stackName = "media-service" final val sessionId = UUID.randomUUID().toString From b7613b6a48e1900adf3a15986220fc49b1c636c3 Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Thu, 4 Feb 2021 06:59:22 +0000 Subject: [PATCH 02/18] fix: upgrade aws-sdk from 2.824.0 to 2.827.0 Snyk has created this PR to upgrade aws-sdk from 2.824.0 to 2.827.0. See this package in npm: https://www.npmjs.com/package/aws-sdk See this project in Snyk: https://app.snyk.io/org/guardian/project/ccea8889-4b98-4a9d-9a24-fb5ac2bbdb3d?utm_source=github&utm_medium=upgrade-pr --- scripts/reindex-images/package-lock.json | 6 +++--- scripts/reindex-images/package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/reindex-images/package-lock.json b/scripts/reindex-images/package-lock.json index 1659dd7a16..bfb4073fa5 100644 --- a/scripts/reindex-images/package-lock.json +++ b/scripts/reindex-images/package-lock.json @@ -5,9 +5,9 @@ "requires": true, "dependencies": { "aws-sdk": { - "version": "2.824.0", - "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.824.0.tgz", - "integrity": "sha512-9KNRQBkIMPn+6DWb4gR+RzqTMNyGLEwOgXbE4dDehOIAflfLnv3IFwLnzrhxJnleB4guYrILIsBroJFBzjiekg==", + "version": "2.827.0", + "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.827.0.tgz", + "integrity": "sha512-71PWS1dqJ65/SeNgDQWEgbJ6oKCuB+Ypq30TM3EyzbAHaxl69WjQRK71oJ2bjhdIHfGQJtOV0G9wg4zpge4Erg==", "requires": { "buffer": "4.9.2", "events": "1.1.1", diff --git a/scripts/reindex-images/package.json b/scripts/reindex-images/package.json index 7d3c3b2ba0..0e40aebc28 100644 --- a/scripts/reindex-images/package.json +++ b/scripts/reindex-images/package.json @@ -9,6 +9,6 @@ "author": "Jonathon Herbert", "license": "ISC", "dependencies": { - "aws-sdk": "^2.824.0" + "aws-sdk": "^2.827.0" } } From dce19a120f6e2d6265547788b71d774f93e6bdd3 Mon Sep 17 00:00:00 2001 From: Alex Ware Date: Thu, 18 Feb 2021 11:08:45 +0000 Subject: [PATCH 03/18] Remove Mona and Sam from illustrator list --- .../scala/com/gu/mediaservice/lib/config/MetadataConfig.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala index b3ba44679d..5848a44a2d 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala @@ -129,8 +129,6 @@ object MetadataConfig { ) val staffIllustrators = List( - "Mona Chalabi", - "Sam Morris", "Guardian Design" ) From 9c9ea763a959ecd87b43bc5972e4031330a003dc Mon Sep 17 00:00:00 2001 From: Justin Rowles Date: Thu, 18 Feb 2021 14:42:21 +0000 Subject: [PATCH 04/18] Make Grid HTTP services common --- .../BatchIndexLambdaHandler.scala | 3 +- .../gu/mediaservice/BatchIndexHandler.scala | 11 ++- .../com/gu/mediaservice/ImageDataMerger.scala | 74 ++------------- .../mediaservice/ImagesBatchProjection.scala | 2 +- build.sbt | 5 +- .../com/gu/mediaservice/GridClient.scala | 91 ++++++++++++++++++- 6 files changed, 106 insertions(+), 80 deletions(-) rename {admin-tools/lib => common-lib}/src/main/scala/com/gu/mediaservice/GridClient.scala (55%) diff --git a/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala b/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala index e71ca9a0cb..e822868f17 100644 --- a/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala +++ b/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala @@ -17,7 +17,8 @@ class BatchIndexLambdaHandler { threshold = sys.env.get("LATENCY_THRESHOLD").map(t => Integer.parseInt(t)), maxSize = sys.env("MAX_SIZE").toInt, startState = IndexInputCreation.get(sys.env("START_STATE").toInt), - checkerStartState = IndexInputCreation.get(sys.env("CHECKER_START_STATE").toInt) + checkerStartState = IndexInputCreation.get(sys.env("CHECKER_START_STATE").toInt), + domainRoot = sys.env("DOMAIN_ROOT"), ) private val batchIndex = new BatchIndexHandler(cfg) diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala index d1b9e72a06..3c2df5bd14 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala @@ -8,16 +8,15 @@ import com.amazonaws.services.dynamodbv2.document.utils.ValueMap import com.gu.mediaservice.indexing.IndexInputCreation._ import com.gu.mediaservice.indexing.ProduceProgress import com.gu.mediaservice.lib.aws.UpdateMessage +import com.gu.mediaservice.lib.config.{ServiceHosts, Services} import com.gu.mediaservice.model.Image import com.typesafe.scalalogging.LazyLogging import net.logstash.logback.marker.Markers import play.api.libs.json.{JsObject, Json} -import scala.collection.JavaConverters._ import scala.concurrent.duration.FiniteDuration import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.{Failure, Success, Try} - import scala.collection.JavaConverters._ case class BatchIndexHandlerConfig( @@ -35,7 +34,8 @@ case class BatchIndexHandlerConfig( threshold: Option[Integer], startState: ProduceProgress, checkerStartState: ProduceProgress, - maxSize: Int + maxSize: Int, + domainRoot: String ) case class SuccessResult(foundImagesCount: Int, notFoundImagesCount: Int, progressHistory: String, projectionTookInSec: Long) @@ -54,9 +54,10 @@ class BatchIndexHandler(cfg: BatchIndexHandlerConfig) extends LoggingWithMarkers private val GetIdsTimeout = new FiniteDuration(20, TimeUnit.SECONDS) private val GlobalTimeout = new FiniteDuration(MainProcessingTimeoutInSec, TimeUnit.SECONDS) private val ImagesProjectionTimeout = new FiniteDuration(ProjectionTimeoutInSec, TimeUnit.MINUTES) - private val gridClient = GridClient(maxIdleConnections, debugHttpResponse = false) + val services = new Services(domainRoot, ServiceHosts.guardianPrefixes, Set.empty) + private val gridClient = GridClient(apiKey, services, maxIdleConnections, debugHttpResponse = false) - private val ImagesBatchProjector = new ImagesBatchProjection(apiKey, projectionEndpoint, ImagesProjectionTimeout, gridClient, maxSize) + private val ImagesBatchProjector = new ImagesBatchProjection(apiKey, ImagesProjectionTimeout, gridClient, maxSize) private val InputIdsStore = new InputIdsStore(AwsHelpers.buildDynamoTableClient(dynamoTableName), batchSize) import ImagesBatchProjector._ diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala index bdb6838881..5e10290c7e 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala @@ -4,8 +4,6 @@ import java.net.URL import com.gu.mediaservice.lib.config.{ServiceHosts, Services} import com.gu.mediaservice.model._ -import com.gu.mediaservice.model.leases.LeasesByMedia -import com.gu.mediaservice.model.usage.Usage import com.typesafe.scalalogging.LazyLogging import org.joda.time.DateTime import play.api.libs.json._ @@ -14,7 +12,6 @@ import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} object ImageDataMergerConfig { - private val gridClient = GridClient(maxIdleConnections = 5) def apply(apiKey: String, domainRoot: String, imageLoaderEndpointOpt: Option[String]): ImageDataMergerConfig = { val services = new Services(domainRoot, ServiceHosts.guardianPrefixes, Set.empty) @@ -22,11 +19,12 @@ object ImageDataMergerConfig { case Some(uri) => uri case None => services.loaderBaseUri } - new ImageDataMergerConfig(apiKey, services, gridClient, imageLoaderEndpoint) + new ImageDataMergerConfig(apiKey, services, imageLoaderEndpoint) } } -case class ImageDataMergerConfig(apiKey: String, services: Services, gridClient: GridClient, imageLoaderEndpoint: String) { +case class ImageDataMergerConfig(apiKey: String, services: Services, imageLoaderEndpoint: String) { + val gridClient = GridClient(apiKey, services, maxIdleConnections = 5) def isValidApiKey(): Boolean = { // Make an API key authenticated request to the leases API as a way of validating the API key. // A 200 indicates a valid key. @@ -138,7 +136,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { import config._ import services._ - def getMergedImageData(mediaId: String)(implicit ec: ExecutionContext): FullImageProjectionResult = { try { val maybeImageFuture = getMergedImageDataInternal(mediaId) @@ -151,7 +148,7 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } private def getMergedImageDataInternal(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Image]] = { - val maybeImage: Option[Image] = getImageLoaderProjection(mediaId) + val maybeImage: Option[Image] = gridClient.getImageLoaderProjection(mediaId, imageLoaderEndpoint) maybeImage match { case Some(img) => aggregate(img).map { aggImg => @@ -166,10 +163,10 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { val mediaId = image.id for { collections <- getCollectionsResponse(mediaId) - edits <- getEdits(mediaId) - leases <- getLeases(mediaId) - usages <- getUsages(mediaId) - crops <- getCrops(mediaId) + edits <- gridClient.getEdits(mediaId) + leases <- gridClient.getLeases(mediaId) + usages <- gridClient.getUsages(mediaId) + crops <- gridClient.getCrops(mediaId) } yield image.copy( collections = collections, userMetadata = edits, @@ -179,15 +176,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { ) } - private def getImageLoaderProjection(mediaId: String): Option[Image] = { - logger.info("attempt to get image projection from image-loader") - val url = new URL(s"$imageLoaderEndpoint/images/project/$mediaId") - val res = gridClient.makeGetRequestSync(url, apiKey) - validateResponse(res, url) - logger.info(s"got image projection from image-loader for $mediaId with status code $res.statusCode") - if (res.statusCode == 200) Some(res.body.as[Image]) else None - } - private def getCollectionsResponse(mediaId: String)(implicit ec: ExecutionContext): Future[List[Collection]] = { logger.info("attempt to get collections") val url = new URL(s"$collectionsBaseUri/images/$mediaId") @@ -197,49 +185,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } } - private def getEdits(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Edits]] = { - logger.info("attempt to get edits") - val url = new URL(s"$metadataBaseUri/edits/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) Some((res.body \ "data").as[Edits]) else None - } - } - - private def getCrops(mediaId: String)(implicit ec: ExecutionContext): Future[List[Crop]] = { - logger.info("attempt to get crops") - val url = new URL(s"$cropperBaseUri/crops/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) (res.body \ "data").as[List[Crop]] else Nil - } - } - - private def getLeases(mediaId: String)(implicit ec: ExecutionContext): Future[LeasesByMedia] = { - logger.info("attempt to get leases") - val url = new URL(s"$leasesBaseUri/leases/media/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) (res.body \ "data").as[LeasesByMedia] else LeasesByMedia.empty - } - } - - private def getUsages(mediaId: String)(implicit ec: ExecutionContext): Future[List[Usage]] = { - logger.info("attempt to get usages") - - def unpackUsagesFromEntityResponse(resBody: JsValue): List[JsValue] = { - (resBody \ "data").as[JsArray].value - .map(entity => (entity.as[JsObject] \ "data").as[JsValue]).toList - } - - val url = new URL(s"$usageBaseUri/usages/media/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) unpackUsagesFromEntityResponse(res.body).map(_.as[Usage]) - else Nil - } - } - private def validateResponse(res: ResponseWrapper, url: URL): Unit = { import res._ if (statusCode != 200 && statusCode != 404) { @@ -260,6 +205,3 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } } -class DownstreamApiInBadStateException(message: String, downstreamMessage: String) extends IllegalStateException(message) { - def getDownstreamMessage = downstreamMessage -} diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala index 54ba3a0916..abc3429096 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala @@ -7,7 +7,7 @@ import com.gu.mediaservice.model.Image import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} -class ImagesBatchProjection(apiKey: String, domainRoot: String, timeout: Duration, gridClient: GridClient, maxSize: Int) { +class ImagesBatchProjection(apiKey: String, timeout: Duration, gridClient: GridClient, maxSize: Int) { private implicit val ThrottledExecutionContext = ExecutionContext.fromExecutor(java.util.concurrent.Executors.newFixedThreadPool(5)) diff --git a/build.sbt b/build.sbt index 5227c3fd54..0baddc64c9 100644 --- a/build.sbt +++ b/build.sbt @@ -110,7 +110,8 @@ lazy val commonLib = project("common-lib").settings( "org.codehaus.janino" % "janino" % "3.0.6", "com.typesafe.play" %% "play-json-joda" % "2.6.9", "com.gu" %% "scanamo" % "1.0.0-M8", - "com.fasterxml.jackson.core" % "jackson-databind" % "2.9.10.7" + "com.fasterxml.jackson.core" % "jackson-databind" % "2.9.10.7", + "com.squareup.okhttp3" % "okhttp" % okHttpVersion ), dependencyOverrides += "org.apache.thrift" % "libthrift" % "0.9.1" ).settings(bbcCommonLibSettings) @@ -134,7 +135,6 @@ lazy val cropper = playProject("cropper", 9006) lazy val imageLoader = playProject("image-loader", 9003).settings { libraryDependencies ++= Seq( - "com.squareup.okhttp3" % "okhttp" % okHttpVersion, "org.apache.tika" % "tika-core" % "1.20", "com.drewnoakes" % "metadata-extractor" % "2.15.0" ) @@ -180,7 +180,6 @@ lazy val adminToolsLib = project("admin-tools-lib", Some("admin-tools/lib")) ExclusionRule("com.gu", "kinesis-logback-appender") ), libraryDependencies ++= Seq( - "com.squareup.okhttp3" % "okhttp" % okHttpVersion, "com.typesafe.play" %% "play-json" % "2.6.9", "com.typesafe.play" %% "play-json-joda" % "2.6.9", "com.typesafe.play" %% "play-functional" % "2.6.9", diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala b/common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala similarity index 55% rename from admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala rename to common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala index ef2589ab8f..8d9a4ed3a8 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala @@ -2,13 +2,18 @@ package com.gu.mediaservice import java.io.IOException import java.net.URL + import com.gu.mediaservice.lib.auth.provider.ApiKeyAuthentication +import com.gu.mediaservice.lib.config.Services +import com.gu.mediaservice.model.{Crop, Edits, Image} +import com.gu.mediaservice.model.leases.LeasesByMedia +import com.gu.mediaservice.model.usage.Usage import com.typesafe.scalalogging.LazyLogging +import play.api.libs.json.{JsArray, JsObject, JsValue, Json} import okhttp3._ -import play.api.libs.json.{JsValue, Json} -import scala.util.{Failure, Success, Try} import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.util.{Failure, Success, Try} object ClientResponse { case class Message(errorMessage: String, downstreamErrorMessage: String) @@ -35,10 +40,11 @@ case class ClientErrorMessages(errorMessage: String, downstreamErrorMessage: Str case class ResponseWrapper(body: JsValue, statusCode: Int, bodyAsString: String) object GridClient { - def apply(maxIdleConnections: Int, debugHttpResponse: Boolean = true): GridClient = new GridClient(maxIdleConnections, debugHttpResponse) + def apply(apiKey: String, services: Services, maxIdleConnections: Int, debugHttpResponse: Boolean = true): GridClient = + new GridClient(apiKey, services, maxIdleConnections, debugHttpResponse) } -class GridClient(maxIdleConnections: Int, debugHttpResponse: Boolean) extends ApiKeyAuthentication with LazyLogging { +class GridClient(apiKey: String, services: Services, maxIdleConnections: Int, debugHttpResponse: Boolean) extends ApiKeyAuthentication with LazyLogging { import java.util.concurrent.TimeUnit @@ -136,5 +142,82 @@ class GridClient(maxIdleConnections: Int, debugHttpResponse: Boolean) extends Ap }) promise.future } + + private def validateResponse(res: ResponseWrapper, url: URL): Unit = { + import res._ + if (statusCode != 200 && statusCode != 404) { + val errorMessage = s"breaking the circuit of full image projection, downstream API: $url is in a bad state, code: $statusCode" + val downstreamErrorMessage = res.bodyAsString + + val errorJson = Json.obj( + "level" -> "ERROR", + "errorStatusCode" -> statusCode, + "message" -> Json.obj( + "errorMessage" -> errorMessage, + "downstreamErrorMessage" -> downstreamErrorMessage + ) + ) + logger.error(errorJson.toString()) + throw new DownstreamApiInBadStateException(errorMessage, downstreamErrorMessage) + } + } + + def getImageLoaderProjection(mediaId: String, imageLoaderEndpoint: String): Option[Image] = { + logger.info("attempt to get image projection from image-loader") + val url = new URL(s"${imageLoaderEndpoint}/images/project/$mediaId") + val res = makeGetRequestSync(url, apiKey) + validateResponse(res, url) + logger.info(s"got image projection from image-loader for $mediaId with status code $res.statusCode") + if (res.statusCode == 200) Some(res.body.as[Image]) else None + } + + def getLeases(mediaId: String)(implicit ec: ExecutionContext): Future[LeasesByMedia] = { + logger.info("attempt to get leases") + val url = new URL(s"${services.leasesBaseUri}/leases/media/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) (res.body \ "data").as[LeasesByMedia] else LeasesByMedia.empty + } + } + + def getEdits(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Edits]] = { + logger.info("attempt to get edits") + val url = new URL(s"${services.metadataBaseUri}/edits/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) Some((res.body \ "data").as[Edits]) else None + } + } + + def getCrops(mediaId: String)(implicit ec: ExecutionContext): Future[List[Crop]] = { + logger.info("attempt to get crops") + val url = new URL(s"${services.cropperBaseUri}/crops/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) (res.body \ "data").as[List[Crop]] else Nil + } + } + + def getUsages(mediaId: String)(implicit ec: ExecutionContext): Future[List[Usage]] = { + logger.info("attempt to get usages") + + def unpackUsagesFromEntityResponse(resBody: JsValue): List[JsValue] = { + (resBody \ "data").as[JsArray].value + .map(entity => (entity.as[JsObject] \ "data").as[JsValue]).toList + } + + val url = new URL(s"${services.usageBaseUri}/usages/media/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) unpackUsagesFromEntityResponse(res.body).map(_.as[Usage]) + else Nil + } + } + + + } +class DownstreamApiInBadStateException(message: String, downstreamMessage: String) extends IllegalStateException(message) { + def getDownstreamMessage = downstreamMessage +} From 53d43cc158f7b32eb5e767bcdcad7210a65847da Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 12 Feb 2021 16:42:29 +0000 Subject: [PATCH 05/18] Sanitise creation and reading of S3 user metadata --- .../gu/mediaservice/lib/ImageStorage.scala | 4 ++ image-loader/app/model/Projector.scala | 37 +++++++++---------- image-loader/app/model/Uploader.scala | 22 ++++++----- .../app/model/upload/UploadRequest.scala | 8 +++- .../test/scala/model/ProjectorTest.scala | 2 +- 5 files changed, 41 insertions(+), 32 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index ad4ea3f1c6..8d4837f75c 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -13,6 +13,10 @@ import com.gu.mediaservice.model.MimeType object ImageStorageProps { val cacheDuration: Duration = 365 days val cacheForever: String = s"max-age=${cacheDuration.toSeconds}" + val filenameMetadataKey: String = "file_name" + val uploadTimeMetadataKey: String = "upload_time" + val uploadedByMetadataKey: String = "uploaded_by" + val identifierMetadataKeyPrefix: String = "identifier!" } trait ImageStorage { diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 18fa1d2d48..83a42037bd 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -2,10 +2,9 @@ package model import java.io.{File, FileOutputStream} import java.util.UUID - import com.amazonaws.services.s3.AmazonS3 import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object} -import com.gu.mediaservice.lib.{ImageIngestOperations, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} +import com.gu.mediaservice.lib.{ImageIngestOperations, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.S3Ops import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations @@ -35,28 +34,32 @@ case class S3FileExtractedMetadata( uploadedBy: String, uploadTime: DateTime, uploadFileName: Option[String], - picdarUrn: Option[String] + identifiers: Map[String, String] ) object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + // The values can be URL encoded in S3 metadata and it is safe to decode everything (based on the tested corpus) + .mapValues(URI.decode) - val uploadedBy = fileUserMetadata.getOrElse("uploaded_by", "re-ingester") - val uploadedTimeRaw = fileUserMetadata.getOrElse("upload_time", lastModified) + val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") + val uploadedTimeRaw = fileUserMetadata.getOrElse(ImageStorageProps.uploadTimeMetadataKey, lastModified) val uploadTime = new DateTime(uploadedTimeRaw).withZone(DateTimeZone.UTC) - val picdarUrn = fileUserMetadata.get("identifier!picdarurn") + val identifiers = fileUserMetadata.filter{ case (key, _) => + key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix) + }.map{ case (key, value) => + key.stripPrefix(ImageStorageProps.identifierMetadataKeyPrefix) -> value + } - val uploadFileNameRaw = fileUserMetadata.get("file_name") - // The file name is URL encoded in S3 metadata - val uploadFileName = uploadFileNameRaw.map(URI.decode) + val uploadFileName = fileUserMetadata.get(ImageStorageProps.filenameMetadataKey) S3FileExtractedMetadata( uploadedBy = uploadedBy, uploadTime = uploadTime, uploadFileName = uploadFileName, - picdarUrn = picdarUrn, + identifiers = identifiers, ) } } @@ -96,14 +99,10 @@ class Projector(config: ImageUploadOpsCfg, def projectImage(srcFileDigest: DigestedFile, extractedS3Meta: S3FileExtractedMetadata, requestId: UUID) (implicit ec: ExecutionContext, logMarker: LogMarker): Future[Image] = { - import extractedS3Meta._ val DigestedFile(tempFile_, id_) = srcFileDigest - // TODO more identifiers_ to rehydrate - val identifiers_ = picdarUrn match { - case Some(value) => Map[String, String]("picdarURN" -> value) - case _ => Map[String, String]() - } - val uploadInfo_ = UploadInfo(filename = uploadFileName) + + val identifiers_ = extractedS3Meta.identifiers + val uploadInfo_ = UploadInfo(filename = extractedS3Meta.uploadFileName) MimeTypeDetection.guessMimeType(tempFile_) match { case util.Left(unsupported) => Future.failed(unsupported) @@ -113,8 +112,8 @@ class Projector(config: ImageUploadOpsCfg, imageId = id_, tempFile = tempFile_, mimeType = Some(mimeType), - uploadTime = uploadTime, - uploadedBy, + uploadTime = extractedS3Meta.uploadTime, + uploadedBy = extractedS3Meta.uploadedBy, identifiers = identifiers_, uploadInfo = uploadInfo_ ) diff --git a/image-loader/app/model/Uploader.scala b/image-loader/app/model/Uploader.scala index 23b89ff006..5e73aa9d1f 100644 --- a/image-loader/app/model/Uploader.scala +++ b/image-loader/app/model/Uploader.scala @@ -5,11 +5,10 @@ import java.net.URLEncoder import java.nio.charset.StandardCharsets import java.nio.file.Files import java.util.UUID - import com.gu.mediaservice.lib.argo.ArgoHelpers import com.gu.mediaservice.lib.auth.Authentication import com.gu.mediaservice.lib.auth.Authentication.Principal -import com.gu.mediaservice.lib.{BrowserViewableImage, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} +import com.gu.mediaservice.lib.{BrowserViewableImage, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.{S3Object, UpdateMessage} import com.gu.mediaservice.lib.cleanup.{ImageProcessor, MetadataCleaners, SupplierProcessors} import com.gu.mediaservice.lib.config.MetadataConfig @@ -17,6 +16,7 @@ import com.gu.mediaservice.lib.formatting._ import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging._ import com.gu.mediaservice.lib.metadata.{FileMetadataHelper, ImageMetadataConverter} +import com.gu.mediaservice.lib.net.URI import com.gu.mediaservice.model._ import lib.{DigestedFile, ImageLoaderConfig, Notifications} import lib.imaging.{FileMetadataReader, MimeTypeDetection} @@ -201,14 +201,13 @@ object Uploader extends GridLogging { def toMetaMap(uploadRequest: UploadRequest): Map[String, String] = { val baseMeta = Map( - "uploaded_by" -> uploadRequest.uploadedBy, - "upload_time" -> printDateTime(uploadRequest.uploadTime) - ) ++ uploadRequest.identifiersMeta + ImageStorageProps.uploadedByMetadataKey -> uploadRequest.uploadedBy, + ImageStorageProps.uploadTimeMetadataKey -> printDateTime(uploadRequest.uploadTime) + ) ++ + uploadRequest.identifiersMeta ++ + uploadRequest.uploadInfo.filename.map(ImageStorageProps.filenameMetadataKey -> _) - uploadRequest.uploadInfo.filename match { - case Some(f) => baseMeta ++ Map("file_name" -> URLEncoder.encode(f, StandardCharsets.UTF_8.name())) - case _ => baseMeta - } + baseMeta.mapValues(URI.encode) } private def toFileMetadata(f: File, imageId: String, mimeType: Option[MimeType]): Future[FileMetadata] = { @@ -297,7 +296,10 @@ class Uploader(val store: ImageLoaderStore, val DigestedFile(tempFile, id) = digestedFile // TODO: should error if the JSON parsing failed - val identifiersMap = identifiers.map(Json.parse(_).as[Map[String, String]]) getOrElse Map() + val identifiersMap = identifiers + .map(Json.parse(_).as[Map[String, String]]) + .getOrElse(Map.empty) + .mapValues(_.toLowerCase) MimeTypeDetection.guessMimeType(tempFile) match { case util.Left(unsupported) => diff --git a/image-loader/app/model/upload/UploadRequest.scala b/image-loader/app/model/upload/UploadRequest.scala index b256776a0a..e3298e7ef1 100644 --- a/image-loader/app/model/upload/UploadRequest.scala +++ b/image-loader/app/model/upload/UploadRequest.scala @@ -1,12 +1,14 @@ package model.upload +import com.gu.mediaservice.lib.ImageStorageProps + import java.io.File import java.util.UUID - import com.gu.mediaservice.model.{MimeType, UploadInfo} import net.logstash.logback.marker.{LogstashMarker, Markers} import org.joda.time.format.ISODateTimeFormat import org.joda.time.{DateTime, DateTimeZone} + import scala.collection.JavaConverters._ case class UploadRequest( @@ -20,7 +22,9 @@ case class UploadRequest( uploadInfo: UploadInfo ) { - val identifiersMeta: Map[String, String] = identifiers.map { case (k, v) => (s"identifier!$k", v) } + val identifiersMeta: Map[String, String] = identifiers.map { case (k, v) => + (s"${ImageStorageProps.identifierMetadataKeyPrefix}$k", v) + } def toLogMarker: LogstashMarker = { val fallback = "none" diff --git a/image-loader/test/scala/model/ProjectorTest.scala b/image-loader/test/scala/model/ProjectorTest.scala index ba860dc3e9..14671493ff 100644 --- a/image-loader/test/scala/model/ProjectorTest.scala +++ b/image-loader/test/scala/model/ProjectorTest.scala @@ -159,7 +159,7 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit uploadedBy = uploadedBy, uploadTime = uploadTime, uploadFileName = uploadFileName, - picdarUrn = None, + identifiers = Map.empty, ) implicit val requestLoggingContext = RequestLoggingContext() From 84fd653e22df2a75c9d027903ef8d31d4217dfaf Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 12 Feb 2021 17:32:55 +0000 Subject: [PATCH 06/18] Switch to kebab case keys --- .../scala/com/gu/mediaservice/lib/ImageStorage.scala | 6 +++--- image-loader/app/model/Projector.scala | 12 ++++++++++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index 8d4837f75c..6f29297dfa 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -13,9 +13,9 @@ import com.gu.mediaservice.model.MimeType object ImageStorageProps { val cacheDuration: Duration = 365 days val cacheForever: String = s"max-age=${cacheDuration.toSeconds}" - val filenameMetadataKey: String = "file_name" - val uploadTimeMetadataKey: String = "upload_time" - val uploadedByMetadataKey: String = "uploaded_by" + val filenameMetadataKey: String = "file-name" + val uploadTimeMetadataKey: String = "upload-time" + val uploadedByMetadataKey: String = "uploaded-by" val identifierMetadataKeyPrefix: String = "identifier!" } diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 83a42037bd..6b58f83019 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -41,8 +41,16 @@ object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap - // The values can be URL encoded in S3 metadata and it is safe to decode everything (based on the tested corpus) - .mapValues(URI.decode) + .map { case (key, value) => + // Fix up the contents of the metadata. + ( + // The keys used to be named with underscores instead of dashes but due to localstack being written in Python + // this didn't work locally (see https://github.com/localstack/localstack/issues/459) + key.replaceAll("_", "-"), + // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) + URI.decode(value) + ) + } val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") val uploadedTimeRaw = fileUserMetadata.getOrElse(ImageStorageProps.uploadTimeMetadataKey, lastModified) From e1ac563df988bb9c225f8647d480a2ebd09fd3c7 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Tue, 16 Feb 2021 15:46:02 +0000 Subject: [PATCH 07/18] Add some simple tests for functionality --- .../test/scala/model/ProjectorTest.scala | 64 +++++++++++++++++-- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/image-loader/test/scala/model/ProjectorTest.scala b/image-loader/test/scala/model/ProjectorTest.scala index 14671493ff..4b54b3632c 100644 --- a/image-loader/test/scala/model/ProjectorTest.scala +++ b/image-loader/test/scala/model/ProjectorTest.scala @@ -2,9 +2,9 @@ package model import java.io.File import java.net.URI -import java.util.UUID - +import java.util.{Date, UUID} import com.amazonaws.services.s3.AmazonS3 +import com.amazonaws.services.s3.model.ObjectMetadata import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.RequestLoggingContext @@ -15,13 +15,14 @@ import org.joda.time.{DateTime, DateTimeZone} import org.scalatest.concurrent.ScalaFutures import org.scalatest.mockito.MockitoSugar import org.scalatest.time.{Millis, Span} -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.{FreeSpec, FunSuite, Matchers} import play.api.libs.json.{JsArray, JsString} import test.lib.ResourceHelpers import scala.concurrent.ExecutionContext.Implicits.global +import scala.collection.JavaConverters._ -class ProjectorTest extends FunSuite with Matchers with ScalaFutures with MockitoSugar { +class ProjectorTest extends FreeSpec with Matchers with ScalaFutures with MockitoSugar { import ResourceHelpers.fileAt @@ -39,7 +40,7 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit // FIXME temporary ignored as test is not executable in CI/CD machine // because graphic lib files like srgb.icc, cmyk.icc are in root directory instead of resources // this test is passing when running on local machine - ignore("projectImage") { + "projectImage" ignore { val testFile = fileAt("resources/getty.jpg") val fileDigest = DigestedFile(testFile, "id123") @@ -171,5 +172,58 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit } } + "S3FileExtractedMetadata" - { + "should extract URL encoded metadata" in { + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "file-name" -> "This%20photo%20was%20taken%20in%20%C5%81%C3%B3d%C5%BA.jpg", + "uploaded-by" -> "s%C3%A9b.cevey%40theguardian.co.uk", + "upload-time" -> "2021-02-01T12%3A52%3A34%2B09%3A00", + "identifier!picdarurn" -> "12*543%5E25" + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadFileName shouldBe Some("This photo was taken in Łódź.jpg") + result.uploadedBy shouldBe "séb.cevey@theguardian.co.uk" + result.uploadTime.toString shouldBe "2021-02-01T03:52:34.000Z" + result.identifiers.size shouldBe 1 + result.identifiers.get("picdarurn") shouldBe Some("12*543^25") + } + + "should remap headers with underscores to dashes" in { + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "file_name" -> "filename.jpg", + "uploaded_by" -> "user", + "upload_time" -> "2021-02-01T12%3A52%3A34%2B09%3A00", + "identifier!picdarurn" -> "12*543" + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadFileName shouldBe Some("filename.jpg") + result.uploadedBy shouldBe "user" + result.uploadTime.toString shouldBe "2021-02-01T03:52:34.000Z" + result.identifiers.size shouldBe 1 + result.identifiers.get("picdarurn") shouldBe Some("12*543") + } + + "should correctly read in non URL encoded values" in { + // we have plenty of values in S3 that are not URL encoded + // and we must be able to read them correctly + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "uploaded_by" -> "user", + "upload_time" -> "2019-12-11T01:12:10.427Z", + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadedBy shouldBe "user" + result.uploadTime.toString shouldBe "2019-12-11T01:12:10.427Z" + } + } + } From 1892fcc4a4772e30fb57754478ee3ee5eb329ea0 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Tue, 16 Feb 2021 15:46:24 +0000 Subject: [PATCH 08/18] Tools for evaluating the corpus --- build.sbt | 9 +++ .../mediaservice/scripts/BucketMetadata.scala | 64 +++++++++++++++++++ .../scripts/DecodeComparator.scala | 39 +++++++++++ .../com/gu/mediaservice/scripts/Main.scala | 2 + 4 files changed, 114 insertions(+) create mode 100644 scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala create mode 100644 scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala diff --git a/build.sbt b/build.sbt index 0baddc64c9..4ae757cf0d 100644 --- a/build.sbt +++ b/build.sbt @@ -246,6 +246,15 @@ lazy val usage = playProject("usage", 9009).settings( lazy val scripts = project("scripts") .dependsOn(commonLib) + .enablePlugins(JavaAppPackaging, UniversalPlugin) + .settings( + libraryDependencies ++= Seq( + // V2 of the AWS SDK as it's easier to use for scripts and won't leak to the rest of the project from here + "software.amazon.awssdk" % "s3" % "2.15.81", + // bump jcommander explicitly as AWS SDK is pulling in a vulnerable version + "com.beust" % "jcommander" % "1.75" + ) + ) lazy val migration = project("migration") .dependsOn(commonLib). diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala new file mode 100644 index 0000000000..232c7d342c --- /dev/null +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala @@ -0,0 +1,64 @@ +package com.gu.mediaservice.scripts + + +import org.joda.time.DateTime +import play.api.libs.json.Json +import software.amazon.awssdk.auth.credentials.{DefaultCredentialsProvider, ProfileCredentialsProvider} +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.services.s3.model.{HeadObjectRequest, ListObjectsV2Request} + +import java.io.{BufferedWriter, File, FileWriter} +import java.time.Instant +import scala.collection.JavaConverters.{asScalaIteratorConverter, iterableAsScalaIterableConverter, mapAsScalaMapConverter} + +case class ObjectMetadata(key: String, lastModified: Instant, metadata: Map[String, String]) +object ObjectMetadata { + implicit val format = Json.format[ObjectMetadata] +} + +/** + * Dump selected metadata for all objects in a bucket. + * Given a bucket and an output file this will create a JSON line per object containing the key, lastModified time and + * user metadata. + */ +object BucketMetadata { + def apply(args: List[String]): Unit = { + args match { + case bucketName :: fileName :: Nil => bucketMetadata(bucketName, new File(fileName)) + case _ => throw new IllegalArgumentException("Usage: BucketMetadata ") + } + } + + def bucketMetadata(bucketName: String, outputFile: File) = { + val fw = new FileWriter(outputFile) + System.err.println(s"Output encoding: ${fw.getEncoding}") + val stream = new BufferedWriter(fw) + + try { + val s3: S3Client = S3Client.builder + .region(Region.EU_WEST_1) + .credentialsProvider(DefaultCredentialsProvider.builder.profileName("media-service").build) + .build + + val results = s3.listObjectsV2Paginator(ListObjectsV2Request.builder.bucket(bucketName).build) + results.iterator().asScala.flatMap { results => + results.contents().asScala + }.map { s3Object => + val headObjectResponse = s3.headObject(HeadObjectRequest.builder.bucket(bucketName).key(s3Object.key).build) + s3Object -> headObjectResponse + }.map { case (s3Object, metadata) => + ObjectMetadata(s3Object.key, metadata.lastModified, metadata.metadata.asScala.toMap) + }.map { md => + Json.stringify(Json.toJson(md)) + }.zipWithIndex + .foreach { case (json, idx) => + if (idx % 1000 == 0) System.err.println(s"${DateTime.now.toString}: ${idx}") + stream.write(json) + stream.newLine() + } + } finally { + stream.close() + } + } +} diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala new file mode 100644 index 0000000000..6d2113dcb1 --- /dev/null +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala @@ -0,0 +1,39 @@ +package com.gu.mediaservice.scripts + +import com.gu.mediaservice.lib.net.URI +import play.api.libs.json.{JsError, JsSuccess, Json} + +import java.io.File +import scala.io.Source + +/** + * Given a JSON lines file output from BucketMetadata this will verify that metadata is not changed when being + * passed through a URI decode function. This was to check that it is safe to deploy + * https://github.com/guardian/grid/pull/3165 + */ +object DecodeComparator { + def apply(args: List[String]): Unit = { + args match { + case fileName :: Nil => compare(new File(fileName)) + case as => throw new IllegalArgumentException("Usage: DecodeComparator ") + } + } + + def compare(file: File): Unit = { + val source = Source.fromFile(file) + try { + source.getLines().foreach { line => + Json.fromJson[ObjectMetadata](Json.parse(line)) match { + case JsError(errors) => System.err.println(s"Couldn't parse JSON $line") + case JsSuccess(metadata, _) => + metadata.metadata.toList.foreach{ case (key, value) => + val decodedValue = URI.decode(value) + if (value != decodedValue) System.out.println(s"Difference between $key '$value' and '$decodedValue'") + } + } + } + } finally { + source.close() + } + } +} diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala index d324473d45..a749af86af 100644 --- a/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala @@ -11,6 +11,8 @@ object Main extends App { case "GetSettings" :: as => GetSettings(as) case "UpdateSettings" :: as => UpdateSettings(as) case "ConvertConfig" :: as => ConvertConfig(as) + case "BucketMetadata" :: as => BucketMetadata(as) + case "DecodeComparator" :: as => DecodeComparator(as) case a :: _ => sys.error(s"Unrecognised command: $a") case Nil => sys.error("Usage: ") } From 7b826ffb4e04b710cba50f502b715c91a4f905a0 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 19 Feb 2021 12:12:42 +0000 Subject: [PATCH 09/18] Only put original image in bucket if absent At the moment we will overwrite subsequent uploads which means that the metadata (last modified time and user headers) are lost. This change should mean that we do not lose the original version of this data. --- .../mediaservice/lib/ImageIngestOperations.scala | 9 ++++++--- .../lib/ImageQuarantineOperations.scala | 2 +- .../com/gu/mediaservice/lib/ImageStorage.scala | 3 ++- .../com/gu/mediaservice/lib/S3ImageStorage.scala | 16 ++++++++++------ .../scala/com/gu/mediaservice/lib/aws/S3.scala | 14 ++++++++++++++ cropper/app/lib/CropStore.scala | 2 +- 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala index 1b070331fd..f70a00d2b3 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala @@ -29,15 +29,18 @@ class ImageIngestOperations(imageBucket: String, thumbnailBucket: String, config private def storeOriginalImage(storableImage: StorableOriginalImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), storableImage.meta) + storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + storableImage.meta, overwrite = false) private def storeThumbnailImage(storableImage: StorableThumbImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) private def storeOptimisedImage(storableImage: StorableOptimisedImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) def deleteOriginal(id: String): Future[Unit] = if(isVersionedS3) deleteVersionedImage(imageBucket, fileKeyFromId(id)) else deleteImage(imageBucket, fileKeyFromId(id)) def deleteThumbnail(id: String): Future[Unit] = deleteImage(thumbnailBucket, fileKeyFromId(id)) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala index 73d9fe81dd..9de53d9490 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala @@ -14,7 +14,7 @@ class ImageQuarantineOperations(quarantineBucket: String, config: CommonConfig, def storeQuarantineImage(id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta) + storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta, overwrite = true) } diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index 6f29297dfa..00c59ac18e 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -34,7 +34,8 @@ trait ImageStorage { /** Store a copy of the given file and return the URI of that copy. * The file can safely be deleted afterwards. */ - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker): Future[S3Object] def deleteImage(bucket: String, id: String): Future[Unit] diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala index 0c7ed5d767..1bf9257b92 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala @@ -16,13 +16,17 @@ class S3ImageStorage(config: CommonConfig) extends S3(config) with ImageStorage private val log = LoggerFactory.getLogger(getClass) private val cacheSetting = Some(cacheForever) - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker) = { - store(bucket, id, file, mimeType, meta, cacheSetting) - .map( _ => - // TODO this is just giving back the stuff we passed in and should be factored out. - S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) - ) + val eventualDone = if (overwrite) { + store(bucket, id, file, mimeType, meta, cacheSetting) + } else { + storeIfNotPresent(bucket, id, file, mimeType, meta, cacheSetting) + } + eventualDone.map { _ => + S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) + } } def deleteImage(bucket: String, id: String) = Future { diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index fafc81872b..977a923a6d 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -136,6 +136,20 @@ class S3(config: CommonConfig) extends GridLogging { }(markers) } + def storeIfNotPresent(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = { + Future{ + client.doesObjectExist(bucket, id) + }.flatMap { alreadyExists => + if (alreadyExists) { + log.info(s"Skipping storing of S3 file $id as key is already present in bucket $bucket") + Future.successful(()) + } else { + store(bucket, id, file, mimeType, meta, cacheControl) + } + } + } + def list(bucket: Bucket, prefixDir: String) (implicit ex: ExecutionContext): Future[List[S3Object]] = Future { diff --git a/cropper/app/lib/CropStore.scala b/cropper/app/lib/CropStore.scala index 1e43eeace4..65c59b678e 100644 --- a/cropper/app/lib/CropStore.scala +++ b/cropper/app/lib/CropStore.scala @@ -35,7 +35,7 @@ class CropStore(config: CropperConfig) extends S3ImageStorage(config) { case (key, value) => key -> value }.mapValues(_.toString) - storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata) map { s3Object => + storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata, overwrite = true) map { s3Object => Asset( translateImgHost(s3Object.uri), Some(s3Object.size), From 2a50ce93f1a516425896949c5185e4c6300c8d5a Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Mon, 22 Feb 2021 10:11:43 +0000 Subject: [PATCH 10/18] Return the object metadata rather than that supplied --- .../gu/mediaservice/lib/S3ImageStorage.scala | 10 +- .../com/gu/mediaservice/lib/aws/S3.scala | 100 ++++++++++-------- image-loader/app/model/Projector.scala | 11 +- .../test/scala/model/ImageUploadTest.scala | 2 +- 4 files changed, 64 insertions(+), 59 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala index 1bf9257b92..79fed45686 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala @@ -1,13 +1,12 @@ package com.gu.mediaservice.lib -import java.io.File - -import com.gu.mediaservice.lib.aws.{S3, S3Ops} +import com.gu.mediaservice.lib.aws.S3 import com.gu.mediaservice.lib.config.CommonConfig import com.gu.mediaservice.lib.logging.LogMarker import com.gu.mediaservice.model.MimeType import org.slf4j.LoggerFactory +import java.io.File import scala.collection.JavaConverters._ import scala.concurrent.Future @@ -19,14 +18,11 @@ class S3ImageStorage(config: CommonConfig) extends S3(config) with ImageStorage def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker) = { - val eventualDone = if (overwrite) { + if (overwrite) { store(bucket, id, file, mimeType, meta, cacheSetting) } else { storeIfNotPresent(bucket, id, file, mimeType, meta, cacheSetting) } - eventualDone.map { _ => - S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) - } } def deleteImage(bucket: String, id: String) = Future { diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index 977a923a6d..24d5d0a106 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -3,7 +3,6 @@ package com.gu.mediaservice.lib.aws import java.io.File import java.net.{URI, URLEncoder} import java.nio.charset.{Charset, StandardCharsets} - import com.amazonaws.{AmazonServiceException, ClientConfiguration} import com.amazonaws.services.s3.model._ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder, model} @@ -19,8 +18,47 @@ import scala.concurrent.{ExecutionContext, Future} case class S3Object(uri: URI, size: Long, metadata: S3Metadata) +object S3Object { + def objectUrl(bucket: String, key: String): URI = { + val bucketUrl = s"$bucket.${S3Ops.s3Endpoint}" + new URI("http", bucketUrl, s"/$key", null) + } + + def apply(bucket: String, key: String, size: Long, metadata: S3Metadata): S3Object = + apply(objectUrl(bucket, key), size, metadata) + + def apply(bucket: String, key: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { + S3Object( + bucket, + key, + file.length, + S3Metadata( + meta, + S3ObjectMetadata( + mimeType, + cacheControl + ) + ) + ) + } +} + case class S3Metadata(userMetadata: Map[String, String], objectMetadata: S3ObjectMetadata) +object S3Metadata { + def apply(meta: ObjectMetadata): S3Metadata = { + S3Metadata( + meta.getUserMetadata.asScala.toMap, + S3ObjectMetadata( + contentType = Option(MimeType(meta.getContentType)), + cacheControl = Option(meta.getCacheControl), + lastModified = Option(meta.getLastModified).map(new DateTime(_)) + ) + ) + } +} + case class S3ObjectMetadata(contentType: Option[MimeType], cacheControl: Option[String], lastModified: Option[DateTime] = None) class S3(config: CommonConfig) extends GridLogging { @@ -28,8 +66,6 @@ class S3(config: CommonConfig) extends GridLogging { type Key = String type UserMetadata = Map[String, String] - import S3Ops.objectUrl - lazy val client: AmazonS3 = S3Ops.buildS3Client(config) // also create a legacy client that uses v2 signatures for URL signing private lazy val legacySigningClient: AmazonS3 = S3Ops.buildS3Client(config, forceV2Sigs = true) @@ -116,7 +152,7 @@ class S3(config: CommonConfig) extends GridLogging { } def store(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = Future { val metadata = new ObjectMetadata mimeType.foreach(m => metadata.setContentType(m.name)) @@ -132,21 +168,24 @@ class S3(config: CommonConfig) extends GridLogging { val req = new PutObjectRequest(bucket, id, file).withMetadata(metadata) Stopwatch(s"S3 client.putObject ($req)"){ - client.putObject(req) + val res = client.putObject(req) + S3Object(bucket, id, res.getMetadata.getContentLength, S3Metadata(res.getMetadata)) }(markers) } def storeIfNotPresent(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = { + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = { Future{ - client.doesObjectExist(bucket, id) - }.flatMap { alreadyExists => - if (alreadyExists) { + Some(client.getObjectMetadata(bucket, id)) + }.recover { + // translate this exception into the object not existing + case as3e:AmazonS3Exception if as3e.getStatusCode == 404 => None + }.flatMap { + case Some(objectMetadata) => log.info(s"Skipping storing of S3 file $id as key is already present in bucket $bucket") - Future.successful(()) - } else { + Future.successful(S3Object(bucket, id, objectMetadata.getContentLength, S3Metadata(objectMetadata))) + case None => store(bucket, id, file, mimeType, meta, cacheControl) - } } } @@ -158,21 +197,13 @@ class S3(config: CommonConfig) extends GridLogging { val summaries = listing.getObjectSummaries.asScala summaries.map(summary => (summary.getKey, summary)).foldLeft(List[S3Object]()) { case (memo: List[S3Object], (key: String, summary: S3ObjectSummary)) => - S3Object(objectUrl(bucket, key), summary.getSize, getMetadata(bucket, key)) :: memo + S3Object(bucket, key, summary.getSize, getMetadata(bucket, key)) :: memo } } def getMetadata(bucket: Bucket, key: Key): S3Metadata = { val meta = client.getObjectMetadata(bucket, key) - - S3Metadata( - meta.getUserMetadata.asScala.toMap, - S3ObjectMetadata( - contentType = Option(MimeType(meta.getContentType)), - cacheControl = Option(meta.getCacheControl), - lastModified = Option(meta.getLastModified).map(new DateTime(_)) - ) - ) + S3Metadata(meta) } def getUserMetadata(bucket: Bucket, key: Key): Map[Bucket, Bucket] = @@ -190,7 +221,7 @@ class S3(config: CommonConfig) extends GridLogging { object S3Ops { // TODO make this localstack friendly // TODO: Make this region aware - i.e. RegionUtils.getRegion(region).getServiceEndpoint(AmazonS3.ENDPOINT_PREFIX) - private val s3Endpoint = "s3.amazonaws.com" + val s3Endpoint = "s3.amazonaws.com" def buildS3Client(config: CommonConfig, forceV2Sigs: Boolean = false, localstackAware: Boolean = true): AmazonS3 = { @@ -212,27 +243,4 @@ object S3Ops { config.withAWSCredentials(builder, localstackAware).build() } - - def objectUrl(bucket: String, key: String): URI = { - val bucketUrl = s"$bucket.$s3Endpoint" - new URI("http", bucketUrl, s"/$key", null) - } - - def projectFileAsS3Object(url: URI, file: File, mimeType: Option[MimeType], meta: Map[String, String], cacheControl: Option[String]): S3Object = { - S3Object( - url, - file.length, - S3Metadata( - meta, - S3ObjectMetadata( - mimeType, - cacheControl - ) - ) - ) - } - - def projectFileAsS3Object(bucket: String, key: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { - projectFileAsS3Object(objectUrl(bucket, key), file, mimeType, meta, cacheControl) - } } diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 6b58f83019..cf627d7931 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -3,9 +3,10 @@ package model import java.io.{File, FileOutputStream} import java.util.UUID import com.amazonaws.services.s3.AmazonS3 -import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object} +import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object => AwsS3Object} import com.gu.mediaservice.lib.{ImageIngestOperations, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.S3Ops +import com.gu.mediaservice.lib.aws.S3Object import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.LogMarker @@ -100,7 +101,7 @@ class Projector(config: ImageUploadOpsCfg, } } - private def getSrcFileDigestForProjection(s3Src: S3Object, imageId: String, tempFile: File) = { + private def getSrcFileDigestForProjection(s3Src: AwsS3Object, imageId: String, tempFile: File) = { IOUtils.copy(s3Src.getObjectContent, new FileOutputStream(tempFile)) DigestedFile(tempFile, imageId) } @@ -147,7 +148,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOriginalFileAsS3Model(storableOriginalImage: StorableOriginalImage) (implicit ec: ExecutionContext)= Future { val key = ImageIngestOperations.fileKeyFromId(storableOriginalImage.id) - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOriginalImage.file, @@ -159,7 +160,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectThumbnailFileAsS3Model(storableThumbImage: StorableThumbImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.fileKeyFromId(storableThumbImage.id) val thumbMimeType = Some(ImageOperations.thumbMimeType) - S3Ops.projectFileAsS3Object( + S3Object( config.thumbBucket, key, storableThumbImage.file, @@ -170,7 +171,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOptimisedPNGFileAsS3Model(storableOptimisedImage: StorableOptimisedImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.optimisedPngKeyFromId(storableOptimisedImage.id) val optimisedPngMimeType = Some(ImageOperations.thumbMimeType) // this IS what we will generate. - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOptimisedImage.file, diff --git a/image-loader/test/scala/model/ImageUploadTest.scala b/image-loader/test/scala/model/ImageUploadTest.scala index 4e6ec20e16..4c6f1cd226 100644 --- a/image-loader/test/scala/model/ImageUploadTest.scala +++ b/image-loader/test/scala/model/ImageUploadTest.scala @@ -53,7 +53,7 @@ class ImageUploadTest extends AsyncFunSuite with Matchers with MockitoSugar { def mockStore = (a: StorableImage) => Future.successful( - S3Ops.projectFileAsS3Object(new URI("http://madeupname/"), a.file, Some(a.mimeType), a.meta, None) + S3Object("madeupname", "madeupkey", a.file, Some(a.mimeType), a.meta, None) ) def storeOrProjectOriginalFile: StorableOriginalImage => Future[S3Object] = mockStore From 2d8da0fef5af658ecfb62fb4c21c92ed8893ff41 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Mon, 22 Feb 2021 11:39:27 +0000 Subject: [PATCH 11/18] Use S3 metadata in preference to request data --- image-loader/app/model/Projector.scala | 32 +++++++++++++++----------- image-loader/app/model/Uploader.scala | 13 ++++++++++- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index cf627d7931..14550d2fbe 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -40,22 +40,26 @@ case class S3FileExtractedMetadata( object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { - val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString - val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap - .map { case (key, value) => - // Fix up the contents of the metadata. - ( - // The keys used to be named with underscores instead of dashes but due to localstack being written in Python - // this didn't work locally (see https://github.com/localstack/localstack/issues/459) - key.replaceAll("_", "-"), - // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) - URI.decode(value) - ) - } + val lastModified = new DateTime(s3ObjectMetadata.getLastModified) + val userMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + apply(lastModified, userMetadata) + } + + def apply(lastModified: DateTime, userMetadata: Map[String, String]): S3FileExtractedMetadata = { + val fileUserMetadata = userMetadata.map { case (key, value) => + // Fix up the contents of the metadata. + ( + // The keys used to be named with underscores instead of dashes but due to localstack being written in Python + // this didn't work locally (see https://github.com/localstack/localstack/issues/459) + key.replaceAll("_", "-"), + // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) + URI.decode(value) + ) + } val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") - val uploadedTimeRaw = fileUserMetadata.getOrElse(ImageStorageProps.uploadTimeMetadataKey, lastModified) - val uploadTime = new DateTime(uploadedTimeRaw).withZone(DateTimeZone.UTC) + val uploadedTimeRaw = fileUserMetadata.get(ImageStorageProps.uploadTimeMetadataKey).map(new DateTime(_).withZone(DateTimeZone.UTC)) + val uploadTime = uploadedTimeRaw.getOrElse(lastModified) val identifiers = fileUserMetadata.filter{ case (key, _) => key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix) }.map{ case (key, value) => diff --git a/image-loader/app/model/Uploader.scala b/image-loader/app/model/Uploader.scala index 5e73aa9d1f..4d2168f57e 100644 --- a/image-loader/app/model/Uploader.scala +++ b/image-loader/app/model/Uploader.scala @@ -146,6 +146,7 @@ object Uploader extends GridLogging { val eventualImage = for { browserViewableImage <- eventualBrowserViewableImage s3Source <- sourceStoreFuture + mergedUploadRequest = patchUploadRequestWithS3Metadata(uploadRequest, s3Source) optimisedFileMetadata <- FileMetadataReader.fromIPTCHeadersWithColorInfo(browserViewableImage) thumbViewableImage <- createThumbFuture(optimisedFileMetadata, colourModelFuture, browserViewableImage, deps) s3Thumb <- storeOrProjectThumbFile(thumbViewableImage) @@ -165,7 +166,7 @@ object Uploader extends GridLogging { val thumbAsset = Asset.fromS3Object(s3Thumb, thumbDimensions) val pngAsset = s3PngOption.map(Asset.fromS3Object(_, sourceDimensions)) - val baseImage = ImageUpload.createImage(uploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) + val baseImage = ImageUpload.createImage(mergedUploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) val processedImage = processor(baseImage) @@ -257,6 +258,16 @@ object Uploader extends GridLogging { case None => Future.failed(new Exception("This file is not an image with an identifiable mime type")) } } + + def patchUploadRequestWithS3Metadata(request: UploadRequest, s3Object: S3Object): UploadRequest = { + val metadata = S3FileExtractedMetadata(s3Object.metadata.objectMetadata.lastModified.getOrElse(new DateTime), s3Object.metadata.userMetadata) + request.copy( + uploadTime = metadata.uploadTime, + uploadedBy = metadata.uploadedBy, + uploadInfo = request.uploadInfo.copy(filename = metadata.uploadFileName), + identifiers = metadata.identifiers + ) + } } class Uploader(val store: ImageLoaderStore, From 3328ec185baa9521af1d902a292566cfffd03c47 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Mon, 22 Feb 2021 13:19:56 +0000 Subject: [PATCH 12/18] Fix NPE when no content-type --- common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index 24d5d0a106..c21275b72c 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -51,7 +51,7 @@ object S3Metadata { S3Metadata( meta.getUserMetadata.asScala.toMap, S3ObjectMetadata( - contentType = Option(MimeType(meta.getContentType)), + contentType = Option(meta.getContentType).map(MimeType.apply), cacheControl = Option(meta.getCacheControl), lastModified = Option(meta.getLastModified).map(new DateTime(_)) ) From 094a2c2c12bbb45275c4b2a2e8d879e3277a8d0b Mon Sep 17 00:00:00 2001 From: snyk-bot Date: Tue, 23 Feb 2021 06:59:53 +0000 Subject: [PATCH 13/18] fix: upgrade aws-sdk from 2.827.0 to 2.834.0 Snyk has created this PR to upgrade aws-sdk from 2.827.0 to 2.834.0. See this package in npm: https://www.npmjs.com/package/aws-sdk See this project in Snyk: https://app.snyk.io/org/guardian/project/ccea8889-4b98-4a9d-9a24-fb5ac2bbdb3d?utm_source=github&utm_medium=upgrade-pr --- scripts/reindex-images/package-lock.json | 6 +++--- scripts/reindex-images/package.json | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/reindex-images/package-lock.json b/scripts/reindex-images/package-lock.json index bfb4073fa5..b6b2d80b2a 100644 --- a/scripts/reindex-images/package-lock.json +++ b/scripts/reindex-images/package-lock.json @@ -5,9 +5,9 @@ "requires": true, "dependencies": { "aws-sdk": { - "version": "2.827.0", - "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.827.0.tgz", - "integrity": "sha512-71PWS1dqJ65/SeNgDQWEgbJ6oKCuB+Ypq30TM3EyzbAHaxl69WjQRK71oJ2bjhdIHfGQJtOV0G9wg4zpge4Erg==", + "version": "2.834.0", + "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.834.0.tgz", + "integrity": "sha512-9WRULrn4qAmgXI+tEW/IG5s/6ixJGZqjPOrmJsFZQev7/WRkxAZmJAjcwd4Ifm/jsJbXx2FSwO76gOPEvu2LqA==", "requires": { "buffer": "4.9.2", "events": "1.1.1", diff --git a/scripts/reindex-images/package.json b/scripts/reindex-images/package.json index 0e40aebc28..82df9633ab 100644 --- a/scripts/reindex-images/package.json +++ b/scripts/reindex-images/package.json @@ -9,6 +9,6 @@ "author": "Jonathon Herbert", "license": "ISC", "dependencies": { - "aws-sdk": "^2.827.0" + "aws-sdk": "^2.834.0" } } From b8b464cb6034440b667a7d64114d76164cfecdd4 Mon Sep 17 00:00:00 2001 From: Justin Rowles Date: Wed, 24 Feb 2021 09:42:55 +0000 Subject: [PATCH 14/18] Fix REST lib tests --- build.sbt | 2 +- .../src/test/resources/application.conf | 0 .../src/test/resources/cmyk.jpg | Bin .../src/test/resources/grayscale-with-profile.jpg | Bin .../src/test/resources/grayscale-wo-profile.jpg | Bin .../src/test/resources/rgb-with-cmyk-profile.jpg | Bin .../src/test/resources/rgb-with-rgb-profile.jpg | Bin .../src/test/resources/rgb-wo-profile.jpg | Bin .../mediaservice/lib/auth/AuthenticationTest.scala | 2 +- 9 files changed, 2 insertions(+), 2 deletions(-) rename {common-lib => rest-lib}/src/test/resources/application.conf (100%) rename {common-lib => rest-lib}/src/test/resources/cmyk.jpg (100%) rename {common-lib => rest-lib}/src/test/resources/grayscale-with-profile.jpg (100%) rename {common-lib => rest-lib}/src/test/resources/grayscale-wo-profile.jpg (100%) rename {common-lib => rest-lib}/src/test/resources/rgb-with-cmyk-profile.jpg (100%) rename {common-lib => rest-lib}/src/test/resources/rgb-with-rgb-profile.jpg (100%) rename {common-lib => rest-lib}/src/test/resources/rgb-wo-profile.jpg (100%) diff --git a/build.sbt b/build.sbt index 4ae757cf0d..243924d6fb 100644 --- a/build.sbt +++ b/build.sbt @@ -27,7 +27,7 @@ val commonSettings = Seq( ) lazy val root = project("grid", path = Some(".")) - .aggregate(commonLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) + .aggregate(commonLib, restLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) .enablePlugins(RiffRaffArtifact) .settings( riffRaffManifestProjectName := s"media-service::grid::all", diff --git a/common-lib/src/test/resources/application.conf b/rest-lib/src/test/resources/application.conf similarity index 100% rename from common-lib/src/test/resources/application.conf rename to rest-lib/src/test/resources/application.conf diff --git a/common-lib/src/test/resources/cmyk.jpg b/rest-lib/src/test/resources/cmyk.jpg similarity index 100% rename from common-lib/src/test/resources/cmyk.jpg rename to rest-lib/src/test/resources/cmyk.jpg diff --git a/common-lib/src/test/resources/grayscale-with-profile.jpg b/rest-lib/src/test/resources/grayscale-with-profile.jpg similarity index 100% rename from common-lib/src/test/resources/grayscale-with-profile.jpg rename to rest-lib/src/test/resources/grayscale-with-profile.jpg diff --git a/common-lib/src/test/resources/grayscale-wo-profile.jpg b/rest-lib/src/test/resources/grayscale-wo-profile.jpg similarity index 100% rename from common-lib/src/test/resources/grayscale-wo-profile.jpg rename to rest-lib/src/test/resources/grayscale-wo-profile.jpg diff --git a/common-lib/src/test/resources/rgb-with-cmyk-profile.jpg b/rest-lib/src/test/resources/rgb-with-cmyk-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-with-cmyk-profile.jpg rename to rest-lib/src/test/resources/rgb-with-cmyk-profile.jpg diff --git a/common-lib/src/test/resources/rgb-with-rgb-profile.jpg b/rest-lib/src/test/resources/rgb-with-rgb-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-with-rgb-profile.jpg rename to rest-lib/src/test/resources/rgb-with-rgb-profile.jpg diff --git a/common-lib/src/test/resources/rgb-wo-profile.jpg b/rest-lib/src/test/resources/rgb-wo-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-wo-profile.jpg rename to rest-lib/src/test/resources/rgb-wo-profile.jpg diff --git a/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala b/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala index ddc0c1a305..6ac73db9fe 100644 --- a/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala +++ b/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala @@ -5,7 +5,7 @@ import akka.stream.ActorMaterializer import com.gu.mediaservice.lib.auth.Authentication.{MachinePrincipal, OnBehalfOfPrincipal, UserPrincipal} import com.gu.mediaservice.lib.auth.provider.AuthenticationProvider.RedirectUri import com.gu.mediaservice.lib.auth.provider._ -import com.gu.mediaservice.lib.config.{CommonConfig, TestProvider} +import com.gu.mediaservice.lib.config.CommonConfig import org.scalatest.{AsyncFreeSpec, BeforeAndAfterAll, EitherValues, Matchers} import org.scalatestplus.play.PlaySpec import play.api.{Configuration, Environment} From bd605010ee0faa8383ab946936a235cc2613337c Mon Sep 17 00:00:00 2001 From: Adriel Ulanovsky Date: Wed, 24 Feb 2021 08:47:41 -0300 Subject: [PATCH 15/18] feature/bbc-pluggable-project: Implement pluggable bbc project mechanism --- build.sbt | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/build.sbt b/build.sbt index 243924d6fb..ea31a79a47 100644 --- a/build.sbt +++ b/build.sbt @@ -26,8 +26,11 @@ val commonSettings = Seq( publishArtifact in (Compile, packageDoc) := false ) +//Common projects to all organizations +lazy val commonProjects: Seq[sbt.ProjectReference] = Seq(commonLib, restLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) + lazy val root = project("grid", path = Some(".")) - .aggregate(commonLib, restLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) + .aggregate((maybeBBCLib.toList ++ commonProjects):_*) .enablePlugins(RiffRaffArtifact) .settings( riffRaffManifestProjectName := s"media-service::grid::all", @@ -67,20 +70,16 @@ val okHttpVersion = "3.12.1" val bbcBuildProcess: Boolean = System.getenv().asScala.get("BUILD_ORG").contains("bbc") -val bbcCommonLibSettings: SettingsDefinition = if (bbcBuildProcess) { - Seq( - libraryDependencies ++= Seq("com.gu" %% "pan-domain-auth-play_2-6" % "0.9.2-SNAPSHOT") - ) -} else { - Seq( - libraryDependencies ++= Seq("com.gu" %% "pan-domain-auth-play_2-6" % "0.8.2") - ) -} +//BBC specific project, it only gets compiled when bbcBuildProcess is true +lazy val bbcProject = project("bbc").dependsOn(restLib) + +val maybeBBCLib: Option[sbt.ProjectReference] = if(bbcBuildProcess) Some(bbcProject) else None lazy val commonLib = project("common-lib").settings( libraryDependencies ++= Seq( // also exists in plugins.sbt, TODO deduplicate this "com.gu" %% "editorial-permissions-client" % "2.0", + "com.gu" %% "pan-domain-auth-play_2-6" % "0.8.2", "com.amazonaws" % "aws-java-sdk-iam" % awsSdkVersion, "com.amazonaws" % "aws-java-sdk-s3" % awsSdkVersion, "com.amazonaws" % "aws-java-sdk-ec2" % awsSdkVersion, @@ -114,7 +113,7 @@ lazy val commonLib = project("common-lib").settings( "com.squareup.okhttp3" % "okhttp" % okHttpVersion ), dependencyOverrides += "org.apache.thrift" % "libthrift" % "0.9.1" -).settings(bbcCommonLibSettings) +) lazy val restLib = project("rest-lib").settings( libraryDependencies ++= Seq( @@ -289,8 +288,8 @@ val buildInfo = Seq( ) ) -def playProject(projectName: String, port: Int, path: Option[String] = None): Project = - project(projectName, path) +def playProject(projectName: String, port: Int, path: Option[String] = None): Project = { + val commonProject = project(projectName, path) .enablePlugins(PlayScala, JDebPackaging, SystemdPlugin, BuildInfoPlugin) .dependsOn(restLib) .settings(commonSettings ++ buildInfo ++ Seq( @@ -317,3 +316,6 @@ def playProject(projectName: String, port: Int, path: Option[String] = None): Pr "-J-XX:GCLogFileSize=2M" ) )) + //Add the BBC library dependency if defined + maybeBBCLib.fold(commonProject){commonProject.dependsOn(_)} +} From d37ff80f5bd3b78bf8b50c328ae1a1233ce23025 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 26 Feb 2021 12:25:45 +0000 Subject: [PATCH 16/18] Revert "Only process user/provided metadata if image has not previously been uploaded" --- .../lib/ImageIngestOperations.scala | 9 +- .../lib/ImageQuarantineOperations.scala | 2 +- .../gu/mediaservice/lib/ImageStorage.scala | 3 +- .../gu/mediaservice/lib/S3ImageStorage.scala | 18 ++-- .../com/gu/mediaservice/lib/aws/S3.scala | 100 +++++++----------- cropper/app/lib/CropStore.scala | 2 +- image-loader/app/model/Projector.scala | 43 ++++---- image-loader/app/model/Uploader.scala | 13 +-- .../test/scala/model/ImageUploadTest.scala | 2 +- 9 files changed, 75 insertions(+), 117 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala index f70a00d2b3..1b070331fd 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala @@ -29,18 +29,15 @@ class ImageIngestOperations(imageBucket: String, thumbnailBucket: String, config private def storeOriginalImage(storableImage: StorableOriginalImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), - storableImage.meta, overwrite = false) + storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), storableImage.meta) private def storeThumbnailImage(storableImage: StorableThumbImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), - overwrite = true) + storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) private def storeOptimisedImage(storableImage: StorableOptimisedImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), - overwrite = true) + storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) def deleteOriginal(id: String): Future[Unit] = if(isVersionedS3) deleteVersionedImage(imageBucket, fileKeyFromId(id)) else deleteImage(imageBucket, fileKeyFromId(id)) def deleteThumbnail(id: String): Future[Unit] = deleteImage(thumbnailBucket, fileKeyFromId(id)) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala index 9de53d9490..73d9fe81dd 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala @@ -14,7 +14,7 @@ class ImageQuarantineOperations(quarantineBucket: String, config: CommonConfig, def storeQuarantineImage(id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta, overwrite = true) + storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta) } diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index 00c59ac18e..6f29297dfa 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -34,8 +34,7 @@ trait ImageStorage { /** Store a copy of the given file and return the URI of that copy. * The file can safely be deleted afterwards. */ - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], - meta: Map[String, String] = Map.empty, overwrite: Boolean) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker): Future[S3Object] def deleteImage(bucket: String, id: String): Future[Unit] diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala index 79fed45686..0c7ed5d767 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala @@ -1,12 +1,13 @@ package com.gu.mediaservice.lib -import com.gu.mediaservice.lib.aws.S3 +import java.io.File + +import com.gu.mediaservice.lib.aws.{S3, S3Ops} import com.gu.mediaservice.lib.config.CommonConfig import com.gu.mediaservice.lib.logging.LogMarker import com.gu.mediaservice.model.MimeType import org.slf4j.LoggerFactory -import java.io.File import scala.collection.JavaConverters._ import scala.concurrent.Future @@ -15,14 +16,13 @@ class S3ImageStorage(config: CommonConfig) extends S3(config) with ImageStorage private val log = LoggerFactory.getLogger(getClass) private val cacheSetting = Some(cacheForever) - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], - meta: Map[String, String] = Map.empty, overwrite: Boolean) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker) = { - if (overwrite) { - store(bucket, id, file, mimeType, meta, cacheSetting) - } else { - storeIfNotPresent(bucket, id, file, mimeType, meta, cacheSetting) - } + store(bucket, id, file, mimeType, meta, cacheSetting) + .map( _ => + // TODO this is just giving back the stuff we passed in and should be factored out. + S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) + ) } def deleteImage(bucket: String, id: String) = Future { diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index c21275b72c..fafc81872b 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -3,6 +3,7 @@ package com.gu.mediaservice.lib.aws import java.io.File import java.net.{URI, URLEncoder} import java.nio.charset.{Charset, StandardCharsets} + import com.amazonaws.{AmazonServiceException, ClientConfiguration} import com.amazonaws.services.s3.model._ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder, model} @@ -18,47 +19,8 @@ import scala.concurrent.{ExecutionContext, Future} case class S3Object(uri: URI, size: Long, metadata: S3Metadata) -object S3Object { - def objectUrl(bucket: String, key: String): URI = { - val bucketUrl = s"$bucket.${S3Ops.s3Endpoint}" - new URI("http", bucketUrl, s"/$key", null) - } - - def apply(bucket: String, key: String, size: Long, metadata: S3Metadata): S3Object = - apply(objectUrl(bucket, key), size, metadata) - - def apply(bucket: String, key: String, file: File, mimeType: Option[MimeType], - meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { - S3Object( - bucket, - key, - file.length, - S3Metadata( - meta, - S3ObjectMetadata( - mimeType, - cacheControl - ) - ) - ) - } -} - case class S3Metadata(userMetadata: Map[String, String], objectMetadata: S3ObjectMetadata) -object S3Metadata { - def apply(meta: ObjectMetadata): S3Metadata = { - S3Metadata( - meta.getUserMetadata.asScala.toMap, - S3ObjectMetadata( - contentType = Option(meta.getContentType).map(MimeType.apply), - cacheControl = Option(meta.getCacheControl), - lastModified = Option(meta.getLastModified).map(new DateTime(_)) - ) - ) - } -} - case class S3ObjectMetadata(contentType: Option[MimeType], cacheControl: Option[String], lastModified: Option[DateTime] = None) class S3(config: CommonConfig) extends GridLogging { @@ -66,6 +28,8 @@ class S3(config: CommonConfig) extends GridLogging { type Key = String type UserMetadata = Map[String, String] + import S3Ops.objectUrl + lazy val client: AmazonS3 = S3Ops.buildS3Client(config) // also create a legacy client that uses v2 signatures for URL signing private lazy val legacySigningClient: AmazonS3 = S3Ops.buildS3Client(config, forceV2Sigs = true) @@ -152,7 +116,7 @@ class S3(config: CommonConfig) extends GridLogging { } def store(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = Future { val metadata = new ObjectMetadata mimeType.foreach(m => metadata.setContentType(m.name)) @@ -168,27 +132,10 @@ class S3(config: CommonConfig) extends GridLogging { val req = new PutObjectRequest(bucket, id, file).withMetadata(metadata) Stopwatch(s"S3 client.putObject ($req)"){ - val res = client.putObject(req) - S3Object(bucket, id, res.getMetadata.getContentLength, S3Metadata(res.getMetadata)) + client.putObject(req) }(markers) } - def storeIfNotPresent(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = { - Future{ - Some(client.getObjectMetadata(bucket, id)) - }.recover { - // translate this exception into the object not existing - case as3e:AmazonS3Exception if as3e.getStatusCode == 404 => None - }.flatMap { - case Some(objectMetadata) => - log.info(s"Skipping storing of S3 file $id as key is already present in bucket $bucket") - Future.successful(S3Object(bucket, id, objectMetadata.getContentLength, S3Metadata(objectMetadata))) - case None => - store(bucket, id, file, mimeType, meta, cacheControl) - } - } - def list(bucket: Bucket, prefixDir: String) (implicit ex: ExecutionContext): Future[List[S3Object]] = Future { @@ -197,13 +144,21 @@ class S3(config: CommonConfig) extends GridLogging { val summaries = listing.getObjectSummaries.asScala summaries.map(summary => (summary.getKey, summary)).foldLeft(List[S3Object]()) { case (memo: List[S3Object], (key: String, summary: S3ObjectSummary)) => - S3Object(bucket, key, summary.getSize, getMetadata(bucket, key)) :: memo + S3Object(objectUrl(bucket, key), summary.getSize, getMetadata(bucket, key)) :: memo } } def getMetadata(bucket: Bucket, key: Key): S3Metadata = { val meta = client.getObjectMetadata(bucket, key) - S3Metadata(meta) + + S3Metadata( + meta.getUserMetadata.asScala.toMap, + S3ObjectMetadata( + contentType = Option(MimeType(meta.getContentType)), + cacheControl = Option(meta.getCacheControl), + lastModified = Option(meta.getLastModified).map(new DateTime(_)) + ) + ) } def getUserMetadata(bucket: Bucket, key: Key): Map[Bucket, Bucket] = @@ -221,7 +176,7 @@ class S3(config: CommonConfig) extends GridLogging { object S3Ops { // TODO make this localstack friendly // TODO: Make this region aware - i.e. RegionUtils.getRegion(region).getServiceEndpoint(AmazonS3.ENDPOINT_PREFIX) - val s3Endpoint = "s3.amazonaws.com" + private val s3Endpoint = "s3.amazonaws.com" def buildS3Client(config: CommonConfig, forceV2Sigs: Boolean = false, localstackAware: Boolean = true): AmazonS3 = { @@ -243,4 +198,27 @@ object S3Ops { config.withAWSCredentials(builder, localstackAware).build() } + + def objectUrl(bucket: String, key: String): URI = { + val bucketUrl = s"$bucket.$s3Endpoint" + new URI("http", bucketUrl, s"/$key", null) + } + + def projectFileAsS3Object(url: URI, file: File, mimeType: Option[MimeType], meta: Map[String, String], cacheControl: Option[String]): S3Object = { + S3Object( + url, + file.length, + S3Metadata( + meta, + S3ObjectMetadata( + mimeType, + cacheControl + ) + ) + ) + } + + def projectFileAsS3Object(bucket: String, key: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { + projectFileAsS3Object(objectUrl(bucket, key), file, mimeType, meta, cacheControl) + } } diff --git a/cropper/app/lib/CropStore.scala b/cropper/app/lib/CropStore.scala index 65c59b678e..1e43eeace4 100644 --- a/cropper/app/lib/CropStore.scala +++ b/cropper/app/lib/CropStore.scala @@ -35,7 +35,7 @@ class CropStore(config: CropperConfig) extends S3ImageStorage(config) { case (key, value) => key -> value }.mapValues(_.toString) - storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata, overwrite = true) map { s3Object => + storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata) map { s3Object => Asset( translateImgHost(s3Object.uri), Some(s3Object.size), diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 14550d2fbe..6b58f83019 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -3,10 +3,9 @@ package model import java.io.{File, FileOutputStream} import java.util.UUID import com.amazonaws.services.s3.AmazonS3 -import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object => AwsS3Object} +import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object} import com.gu.mediaservice.lib.{ImageIngestOperations, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.S3Ops -import com.gu.mediaservice.lib.aws.S3Object import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.LogMarker @@ -40,26 +39,22 @@ case class S3FileExtractedMetadata( object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { - val lastModified = new DateTime(s3ObjectMetadata.getLastModified) - val userMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap - apply(lastModified, userMetadata) - } - - def apply(lastModified: DateTime, userMetadata: Map[String, String]): S3FileExtractedMetadata = { - val fileUserMetadata = userMetadata.map { case (key, value) => - // Fix up the contents of the metadata. - ( - // The keys used to be named with underscores instead of dashes but due to localstack being written in Python - // this didn't work locally (see https://github.com/localstack/localstack/issues/459) - key.replaceAll("_", "-"), - // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) - URI.decode(value) - ) - } + val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString + val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + .map { case (key, value) => + // Fix up the contents of the metadata. + ( + // The keys used to be named with underscores instead of dashes but due to localstack being written in Python + // this didn't work locally (see https://github.com/localstack/localstack/issues/459) + key.replaceAll("_", "-"), + // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) + URI.decode(value) + ) + } val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") - val uploadedTimeRaw = fileUserMetadata.get(ImageStorageProps.uploadTimeMetadataKey).map(new DateTime(_).withZone(DateTimeZone.UTC)) - val uploadTime = uploadedTimeRaw.getOrElse(lastModified) + val uploadedTimeRaw = fileUserMetadata.getOrElse(ImageStorageProps.uploadTimeMetadataKey, lastModified) + val uploadTime = new DateTime(uploadedTimeRaw).withZone(DateTimeZone.UTC) val identifiers = fileUserMetadata.filter{ case (key, _) => key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix) }.map{ case (key, value) => @@ -105,7 +100,7 @@ class Projector(config: ImageUploadOpsCfg, } } - private def getSrcFileDigestForProjection(s3Src: AwsS3Object, imageId: String, tempFile: File) = { + private def getSrcFileDigestForProjection(s3Src: S3Object, imageId: String, tempFile: File) = { IOUtils.copy(s3Src.getObjectContent, new FileOutputStream(tempFile)) DigestedFile(tempFile, imageId) } @@ -152,7 +147,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOriginalFileAsS3Model(storableOriginalImage: StorableOriginalImage) (implicit ec: ExecutionContext)= Future { val key = ImageIngestOperations.fileKeyFromId(storableOriginalImage.id) - S3Object( + S3Ops.projectFileAsS3Object( config.originalFileBucket, key, storableOriginalImage.file, @@ -164,7 +159,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectThumbnailFileAsS3Model(storableThumbImage: StorableThumbImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.fileKeyFromId(storableThumbImage.id) val thumbMimeType = Some(ImageOperations.thumbMimeType) - S3Object( + S3Ops.projectFileAsS3Object( config.thumbBucket, key, storableThumbImage.file, @@ -175,7 +170,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOptimisedPNGFileAsS3Model(storableOptimisedImage: StorableOptimisedImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.optimisedPngKeyFromId(storableOptimisedImage.id) val optimisedPngMimeType = Some(ImageOperations.thumbMimeType) // this IS what we will generate. - S3Object( + S3Ops.projectFileAsS3Object( config.originalFileBucket, key, storableOptimisedImage.file, diff --git a/image-loader/app/model/Uploader.scala b/image-loader/app/model/Uploader.scala index 4d2168f57e..5e73aa9d1f 100644 --- a/image-loader/app/model/Uploader.scala +++ b/image-loader/app/model/Uploader.scala @@ -146,7 +146,6 @@ object Uploader extends GridLogging { val eventualImage = for { browserViewableImage <- eventualBrowserViewableImage s3Source <- sourceStoreFuture - mergedUploadRequest = patchUploadRequestWithS3Metadata(uploadRequest, s3Source) optimisedFileMetadata <- FileMetadataReader.fromIPTCHeadersWithColorInfo(browserViewableImage) thumbViewableImage <- createThumbFuture(optimisedFileMetadata, colourModelFuture, browserViewableImage, deps) s3Thumb <- storeOrProjectThumbFile(thumbViewableImage) @@ -166,7 +165,7 @@ object Uploader extends GridLogging { val thumbAsset = Asset.fromS3Object(s3Thumb, thumbDimensions) val pngAsset = s3PngOption.map(Asset.fromS3Object(_, sourceDimensions)) - val baseImage = ImageUpload.createImage(mergedUploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) + val baseImage = ImageUpload.createImage(uploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) val processedImage = processor(baseImage) @@ -258,16 +257,6 @@ object Uploader extends GridLogging { case None => Future.failed(new Exception("This file is not an image with an identifiable mime type")) } } - - def patchUploadRequestWithS3Metadata(request: UploadRequest, s3Object: S3Object): UploadRequest = { - val metadata = S3FileExtractedMetadata(s3Object.metadata.objectMetadata.lastModified.getOrElse(new DateTime), s3Object.metadata.userMetadata) - request.copy( - uploadTime = metadata.uploadTime, - uploadedBy = metadata.uploadedBy, - uploadInfo = request.uploadInfo.copy(filename = metadata.uploadFileName), - identifiers = metadata.identifiers - ) - } } class Uploader(val store: ImageLoaderStore, diff --git a/image-loader/test/scala/model/ImageUploadTest.scala b/image-loader/test/scala/model/ImageUploadTest.scala index 4c6f1cd226..4e6ec20e16 100644 --- a/image-loader/test/scala/model/ImageUploadTest.scala +++ b/image-loader/test/scala/model/ImageUploadTest.scala @@ -53,7 +53,7 @@ class ImageUploadTest extends AsyncFunSuite with Matchers with MockitoSugar { def mockStore = (a: StorableImage) => Future.successful( - S3Object("madeupname", "madeupkey", a.file, Some(a.mimeType), a.meta, None) + S3Ops.projectFileAsS3Object(new URI("http://madeupname/"), a.file, Some(a.mimeType), a.meta, None) ) def storeOrProjectOriginalFile: StorableOriginalImage => Future[S3Object] = mockStore From f114601b30fc1aa05ba7845638e2ab348fe9865f Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 26 Feb 2021 16:22:56 +0000 Subject: [PATCH 17/18] Revert "Revert "Only process user/provided metadata if image has not previously been uploaded"" --- .../lib/ImageIngestOperations.scala | 9 +- .../lib/ImageQuarantineOperations.scala | 2 +- .../gu/mediaservice/lib/ImageStorage.scala | 3 +- .../gu/mediaservice/lib/S3ImageStorage.scala | 18 ++-- .../com/gu/mediaservice/lib/aws/S3.scala | 100 +++++++++++------- cropper/app/lib/CropStore.scala | 2 +- image-loader/app/model/Projector.scala | 43 ++++---- image-loader/app/model/Uploader.scala | 13 ++- .../test/scala/model/ImageUploadTest.scala | 2 +- 9 files changed, 117 insertions(+), 75 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala index 1b070331fd..f70a00d2b3 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala @@ -29,15 +29,18 @@ class ImageIngestOperations(imageBucket: String, thumbnailBucket: String, config private def storeOriginalImage(storableImage: StorableOriginalImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), storableImage.meta) + storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + storableImage.meta, overwrite = false) private def storeThumbnailImage(storableImage: StorableThumbImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) private def storeOptimisedImage(storableImage: StorableOptimisedImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) def deleteOriginal(id: String): Future[Unit] = if(isVersionedS3) deleteVersionedImage(imageBucket, fileKeyFromId(id)) else deleteImage(imageBucket, fileKeyFromId(id)) def deleteThumbnail(id: String): Future[Unit] = deleteImage(thumbnailBucket, fileKeyFromId(id)) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala index 73d9fe81dd..9de53d9490 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala @@ -14,7 +14,7 @@ class ImageQuarantineOperations(quarantineBucket: String, config: CommonConfig, def storeQuarantineImage(id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta) + storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta, overwrite = true) } diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index 6f29297dfa..00c59ac18e 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -34,7 +34,8 @@ trait ImageStorage { /** Store a copy of the given file and return the URI of that copy. * The file can safely be deleted afterwards. */ - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker): Future[S3Object] def deleteImage(bucket: String, id: String): Future[Unit] diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala index 0c7ed5d767..79fed45686 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala @@ -1,13 +1,12 @@ package com.gu.mediaservice.lib -import java.io.File - -import com.gu.mediaservice.lib.aws.{S3, S3Ops} +import com.gu.mediaservice.lib.aws.S3 import com.gu.mediaservice.lib.config.CommonConfig import com.gu.mediaservice.lib.logging.LogMarker import com.gu.mediaservice.model.MimeType import org.slf4j.LoggerFactory +import java.io.File import scala.collection.JavaConverters._ import scala.concurrent.Future @@ -16,13 +15,14 @@ class S3ImageStorage(config: CommonConfig) extends S3(config) with ImageStorage private val log = LoggerFactory.getLogger(getClass) private val cacheSetting = Some(cacheForever) - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker) = { - store(bucket, id, file, mimeType, meta, cacheSetting) - .map( _ => - // TODO this is just giving back the stuff we passed in and should be factored out. - S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) - ) + if (overwrite) { + store(bucket, id, file, mimeType, meta, cacheSetting) + } else { + storeIfNotPresent(bucket, id, file, mimeType, meta, cacheSetting) + } } def deleteImage(bucket: String, id: String) = Future { diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index fafc81872b..c21275b72c 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -3,7 +3,6 @@ package com.gu.mediaservice.lib.aws import java.io.File import java.net.{URI, URLEncoder} import java.nio.charset.{Charset, StandardCharsets} - import com.amazonaws.{AmazonServiceException, ClientConfiguration} import com.amazonaws.services.s3.model._ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder, model} @@ -19,8 +18,47 @@ import scala.concurrent.{ExecutionContext, Future} case class S3Object(uri: URI, size: Long, metadata: S3Metadata) +object S3Object { + def objectUrl(bucket: String, key: String): URI = { + val bucketUrl = s"$bucket.${S3Ops.s3Endpoint}" + new URI("http", bucketUrl, s"/$key", null) + } + + def apply(bucket: String, key: String, size: Long, metadata: S3Metadata): S3Object = + apply(objectUrl(bucket, key), size, metadata) + + def apply(bucket: String, key: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { + S3Object( + bucket, + key, + file.length, + S3Metadata( + meta, + S3ObjectMetadata( + mimeType, + cacheControl + ) + ) + ) + } +} + case class S3Metadata(userMetadata: Map[String, String], objectMetadata: S3ObjectMetadata) +object S3Metadata { + def apply(meta: ObjectMetadata): S3Metadata = { + S3Metadata( + meta.getUserMetadata.asScala.toMap, + S3ObjectMetadata( + contentType = Option(meta.getContentType).map(MimeType.apply), + cacheControl = Option(meta.getCacheControl), + lastModified = Option(meta.getLastModified).map(new DateTime(_)) + ) + ) + } +} + case class S3ObjectMetadata(contentType: Option[MimeType], cacheControl: Option[String], lastModified: Option[DateTime] = None) class S3(config: CommonConfig) extends GridLogging { @@ -28,8 +66,6 @@ class S3(config: CommonConfig) extends GridLogging { type Key = String type UserMetadata = Map[String, String] - import S3Ops.objectUrl - lazy val client: AmazonS3 = S3Ops.buildS3Client(config) // also create a legacy client that uses v2 signatures for URL signing private lazy val legacySigningClient: AmazonS3 = S3Ops.buildS3Client(config, forceV2Sigs = true) @@ -116,7 +152,7 @@ class S3(config: CommonConfig) extends GridLogging { } def store(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = Future { val metadata = new ObjectMetadata mimeType.foreach(m => metadata.setContentType(m.name)) @@ -132,10 +168,27 @@ class S3(config: CommonConfig) extends GridLogging { val req = new PutObjectRequest(bucket, id, file).withMetadata(metadata) Stopwatch(s"S3 client.putObject ($req)"){ - client.putObject(req) + val res = client.putObject(req) + S3Object(bucket, id, res.getMetadata.getContentLength, S3Metadata(res.getMetadata)) }(markers) } + def storeIfNotPresent(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = { + Future{ + Some(client.getObjectMetadata(bucket, id)) + }.recover { + // translate this exception into the object not existing + case as3e:AmazonS3Exception if as3e.getStatusCode == 404 => None + }.flatMap { + case Some(objectMetadata) => + log.info(s"Skipping storing of S3 file $id as key is already present in bucket $bucket") + Future.successful(S3Object(bucket, id, objectMetadata.getContentLength, S3Metadata(objectMetadata))) + case None => + store(bucket, id, file, mimeType, meta, cacheControl) + } + } + def list(bucket: Bucket, prefixDir: String) (implicit ex: ExecutionContext): Future[List[S3Object]] = Future { @@ -144,21 +197,13 @@ class S3(config: CommonConfig) extends GridLogging { val summaries = listing.getObjectSummaries.asScala summaries.map(summary => (summary.getKey, summary)).foldLeft(List[S3Object]()) { case (memo: List[S3Object], (key: String, summary: S3ObjectSummary)) => - S3Object(objectUrl(bucket, key), summary.getSize, getMetadata(bucket, key)) :: memo + S3Object(bucket, key, summary.getSize, getMetadata(bucket, key)) :: memo } } def getMetadata(bucket: Bucket, key: Key): S3Metadata = { val meta = client.getObjectMetadata(bucket, key) - - S3Metadata( - meta.getUserMetadata.asScala.toMap, - S3ObjectMetadata( - contentType = Option(MimeType(meta.getContentType)), - cacheControl = Option(meta.getCacheControl), - lastModified = Option(meta.getLastModified).map(new DateTime(_)) - ) - ) + S3Metadata(meta) } def getUserMetadata(bucket: Bucket, key: Key): Map[Bucket, Bucket] = @@ -176,7 +221,7 @@ class S3(config: CommonConfig) extends GridLogging { object S3Ops { // TODO make this localstack friendly // TODO: Make this region aware - i.e. RegionUtils.getRegion(region).getServiceEndpoint(AmazonS3.ENDPOINT_PREFIX) - private val s3Endpoint = "s3.amazonaws.com" + val s3Endpoint = "s3.amazonaws.com" def buildS3Client(config: CommonConfig, forceV2Sigs: Boolean = false, localstackAware: Boolean = true): AmazonS3 = { @@ -198,27 +243,4 @@ object S3Ops { config.withAWSCredentials(builder, localstackAware).build() } - - def objectUrl(bucket: String, key: String): URI = { - val bucketUrl = s"$bucket.$s3Endpoint" - new URI("http", bucketUrl, s"/$key", null) - } - - def projectFileAsS3Object(url: URI, file: File, mimeType: Option[MimeType], meta: Map[String, String], cacheControl: Option[String]): S3Object = { - S3Object( - url, - file.length, - S3Metadata( - meta, - S3ObjectMetadata( - mimeType, - cacheControl - ) - ) - ) - } - - def projectFileAsS3Object(bucket: String, key: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { - projectFileAsS3Object(objectUrl(bucket, key), file, mimeType, meta, cacheControl) - } } diff --git a/cropper/app/lib/CropStore.scala b/cropper/app/lib/CropStore.scala index 1e43eeace4..65c59b678e 100644 --- a/cropper/app/lib/CropStore.scala +++ b/cropper/app/lib/CropStore.scala @@ -35,7 +35,7 @@ class CropStore(config: CropperConfig) extends S3ImageStorage(config) { case (key, value) => key -> value }.mapValues(_.toString) - storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata) map { s3Object => + storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata, overwrite = true) map { s3Object => Asset( translateImgHost(s3Object.uri), Some(s3Object.size), diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 6b58f83019..14550d2fbe 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -3,9 +3,10 @@ package model import java.io.{File, FileOutputStream} import java.util.UUID import com.amazonaws.services.s3.AmazonS3 -import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object} +import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object => AwsS3Object} import com.gu.mediaservice.lib.{ImageIngestOperations, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.S3Ops +import com.gu.mediaservice.lib.aws.S3Object import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.LogMarker @@ -39,22 +40,26 @@ case class S3FileExtractedMetadata( object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { - val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString - val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap - .map { case (key, value) => - // Fix up the contents of the metadata. - ( - // The keys used to be named with underscores instead of dashes but due to localstack being written in Python - // this didn't work locally (see https://github.com/localstack/localstack/issues/459) - key.replaceAll("_", "-"), - // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) - URI.decode(value) - ) - } + val lastModified = new DateTime(s3ObjectMetadata.getLastModified) + val userMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + apply(lastModified, userMetadata) + } + + def apply(lastModified: DateTime, userMetadata: Map[String, String]): S3FileExtractedMetadata = { + val fileUserMetadata = userMetadata.map { case (key, value) => + // Fix up the contents of the metadata. + ( + // The keys used to be named with underscores instead of dashes but due to localstack being written in Python + // this didn't work locally (see https://github.com/localstack/localstack/issues/459) + key.replaceAll("_", "-"), + // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) + URI.decode(value) + ) + } val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") - val uploadedTimeRaw = fileUserMetadata.getOrElse(ImageStorageProps.uploadTimeMetadataKey, lastModified) - val uploadTime = new DateTime(uploadedTimeRaw).withZone(DateTimeZone.UTC) + val uploadedTimeRaw = fileUserMetadata.get(ImageStorageProps.uploadTimeMetadataKey).map(new DateTime(_).withZone(DateTimeZone.UTC)) + val uploadTime = uploadedTimeRaw.getOrElse(lastModified) val identifiers = fileUserMetadata.filter{ case (key, _) => key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix) }.map{ case (key, value) => @@ -100,7 +105,7 @@ class Projector(config: ImageUploadOpsCfg, } } - private def getSrcFileDigestForProjection(s3Src: S3Object, imageId: String, tempFile: File) = { + private def getSrcFileDigestForProjection(s3Src: AwsS3Object, imageId: String, tempFile: File) = { IOUtils.copy(s3Src.getObjectContent, new FileOutputStream(tempFile)) DigestedFile(tempFile, imageId) } @@ -147,7 +152,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOriginalFileAsS3Model(storableOriginalImage: StorableOriginalImage) (implicit ec: ExecutionContext)= Future { val key = ImageIngestOperations.fileKeyFromId(storableOriginalImage.id) - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOriginalImage.file, @@ -159,7 +164,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectThumbnailFileAsS3Model(storableThumbImage: StorableThumbImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.fileKeyFromId(storableThumbImage.id) val thumbMimeType = Some(ImageOperations.thumbMimeType) - S3Ops.projectFileAsS3Object( + S3Object( config.thumbBucket, key, storableThumbImage.file, @@ -170,7 +175,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOptimisedPNGFileAsS3Model(storableOptimisedImage: StorableOptimisedImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.optimisedPngKeyFromId(storableOptimisedImage.id) val optimisedPngMimeType = Some(ImageOperations.thumbMimeType) // this IS what we will generate. - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOptimisedImage.file, diff --git a/image-loader/app/model/Uploader.scala b/image-loader/app/model/Uploader.scala index 5e73aa9d1f..4d2168f57e 100644 --- a/image-loader/app/model/Uploader.scala +++ b/image-loader/app/model/Uploader.scala @@ -146,6 +146,7 @@ object Uploader extends GridLogging { val eventualImage = for { browserViewableImage <- eventualBrowserViewableImage s3Source <- sourceStoreFuture + mergedUploadRequest = patchUploadRequestWithS3Metadata(uploadRequest, s3Source) optimisedFileMetadata <- FileMetadataReader.fromIPTCHeadersWithColorInfo(browserViewableImage) thumbViewableImage <- createThumbFuture(optimisedFileMetadata, colourModelFuture, browserViewableImage, deps) s3Thumb <- storeOrProjectThumbFile(thumbViewableImage) @@ -165,7 +166,7 @@ object Uploader extends GridLogging { val thumbAsset = Asset.fromS3Object(s3Thumb, thumbDimensions) val pngAsset = s3PngOption.map(Asset.fromS3Object(_, sourceDimensions)) - val baseImage = ImageUpload.createImage(uploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) + val baseImage = ImageUpload.createImage(mergedUploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) val processedImage = processor(baseImage) @@ -257,6 +258,16 @@ object Uploader extends GridLogging { case None => Future.failed(new Exception("This file is not an image with an identifiable mime type")) } } + + def patchUploadRequestWithS3Metadata(request: UploadRequest, s3Object: S3Object): UploadRequest = { + val metadata = S3FileExtractedMetadata(s3Object.metadata.objectMetadata.lastModified.getOrElse(new DateTime), s3Object.metadata.userMetadata) + request.copy( + uploadTime = metadata.uploadTime, + uploadedBy = metadata.uploadedBy, + uploadInfo = request.uploadInfo.copy(filename = metadata.uploadFileName), + identifiers = metadata.identifiers + ) + } } class Uploader(val store: ImageLoaderStore, diff --git a/image-loader/test/scala/model/ImageUploadTest.scala b/image-loader/test/scala/model/ImageUploadTest.scala index 4e6ec20e16..4c6f1cd226 100644 --- a/image-loader/test/scala/model/ImageUploadTest.scala +++ b/image-loader/test/scala/model/ImageUploadTest.scala @@ -53,7 +53,7 @@ class ImageUploadTest extends AsyncFunSuite with Matchers with MockitoSugar { def mockStore = (a: StorableImage) => Future.successful( - S3Ops.projectFileAsS3Object(new URI("http://madeupname/"), a.file, Some(a.mimeType), a.meta, None) + S3Object("madeupname", "madeupkey", a.file, Some(a.mimeType), a.meta, None) ) def storeOrProjectOriginalFile: StorableOriginalImage => Future[S3Object] = mockStore From 4c4e535146a97884f3a02bb904870435f579e196 Mon Sep 17 00:00:00 2001 From: Simon Hildrew Date: Fri, 26 Feb 2021 16:33:42 +0000 Subject: [PATCH 18/18] Read back from the S3 object itself --- .../src/main/scala/com/gu/mediaservice/lib/aws/S3.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index c21275b72c..9657da3969 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -168,8 +168,10 @@ class S3(config: CommonConfig) extends GridLogging { val req = new PutObjectRequest(bucket, id, file).withMetadata(metadata) Stopwatch(s"S3 client.putObject ($req)"){ - val res = client.putObject(req) - S3Object(bucket, id, res.getMetadata.getContentLength, S3Metadata(res.getMetadata)) + client.putObject(req) + // once we've completed the PUT read back to ensure that we are returning reality + val metadata = client.getObjectMetadata(bucket, id) + S3Object(bucket, id, metadata.getContentLength, S3Metadata(metadata)) }(markers) }