diff --git a/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala b/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala index e71ca9a0cb..e822868f17 100644 --- a/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala +++ b/admin-tools/lambda/src/main/scala/com/gu/mediaservice/BatchIndexLambdaHandler.scala @@ -17,7 +17,8 @@ class BatchIndexLambdaHandler { threshold = sys.env.get("LATENCY_THRESHOLD").map(t => Integer.parseInt(t)), maxSize = sys.env("MAX_SIZE").toInt, startState = IndexInputCreation.get(sys.env("START_STATE").toInt), - checkerStartState = IndexInputCreation.get(sys.env("CHECKER_START_STATE").toInt) + checkerStartState = IndexInputCreation.get(sys.env("CHECKER_START_STATE").toInt), + domainRoot = sys.env("DOMAIN_ROOT"), ) private val batchIndex = new BatchIndexHandler(cfg) diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala index d1b9e72a06..3c2df5bd14 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/BatchIndexHandler.scala @@ -8,16 +8,15 @@ import com.amazonaws.services.dynamodbv2.document.utils.ValueMap import com.gu.mediaservice.indexing.IndexInputCreation._ import com.gu.mediaservice.indexing.ProduceProgress import com.gu.mediaservice.lib.aws.UpdateMessage +import com.gu.mediaservice.lib.config.{ServiceHosts, Services} import com.gu.mediaservice.model.Image import com.typesafe.scalalogging.LazyLogging import net.logstash.logback.marker.Markers import play.api.libs.json.{JsObject, Json} -import scala.collection.JavaConverters._ import scala.concurrent.duration.FiniteDuration import scala.concurrent.{Await, ExecutionContext, Future} import scala.util.{Failure, Success, Try} - import scala.collection.JavaConverters._ case class BatchIndexHandlerConfig( @@ -35,7 +34,8 @@ case class BatchIndexHandlerConfig( threshold: Option[Integer], startState: ProduceProgress, checkerStartState: ProduceProgress, - maxSize: Int + maxSize: Int, + domainRoot: String ) case class SuccessResult(foundImagesCount: Int, notFoundImagesCount: Int, progressHistory: String, projectionTookInSec: Long) @@ -54,9 +54,10 @@ class BatchIndexHandler(cfg: BatchIndexHandlerConfig) extends LoggingWithMarkers private val GetIdsTimeout = new FiniteDuration(20, TimeUnit.SECONDS) private val GlobalTimeout = new FiniteDuration(MainProcessingTimeoutInSec, TimeUnit.SECONDS) private val ImagesProjectionTimeout = new FiniteDuration(ProjectionTimeoutInSec, TimeUnit.MINUTES) - private val gridClient = GridClient(maxIdleConnections, debugHttpResponse = false) + val services = new Services(domainRoot, ServiceHosts.guardianPrefixes, Set.empty) + private val gridClient = GridClient(apiKey, services, maxIdleConnections, debugHttpResponse = false) - private val ImagesBatchProjector = new ImagesBatchProjection(apiKey, projectionEndpoint, ImagesProjectionTimeout, gridClient, maxSize) + private val ImagesBatchProjector = new ImagesBatchProjection(apiKey, ImagesProjectionTimeout, gridClient, maxSize) private val InputIdsStore = new InputIdsStore(AwsHelpers.buildDynamoTableClient(dynamoTableName), batchSize) import ImagesBatchProjector._ diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala index bdb6838881..5e10290c7e 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImageDataMerger.scala @@ -4,8 +4,6 @@ import java.net.URL import com.gu.mediaservice.lib.config.{ServiceHosts, Services} import com.gu.mediaservice.model._ -import com.gu.mediaservice.model.leases.LeasesByMedia -import com.gu.mediaservice.model.usage.Usage import com.typesafe.scalalogging.LazyLogging import org.joda.time.DateTime import play.api.libs.json._ @@ -14,7 +12,6 @@ import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} object ImageDataMergerConfig { - private val gridClient = GridClient(maxIdleConnections = 5) def apply(apiKey: String, domainRoot: String, imageLoaderEndpointOpt: Option[String]): ImageDataMergerConfig = { val services = new Services(domainRoot, ServiceHosts.guardianPrefixes, Set.empty) @@ -22,11 +19,12 @@ object ImageDataMergerConfig { case Some(uri) => uri case None => services.loaderBaseUri } - new ImageDataMergerConfig(apiKey, services, gridClient, imageLoaderEndpoint) + new ImageDataMergerConfig(apiKey, services, imageLoaderEndpoint) } } -case class ImageDataMergerConfig(apiKey: String, services: Services, gridClient: GridClient, imageLoaderEndpoint: String) { +case class ImageDataMergerConfig(apiKey: String, services: Services, imageLoaderEndpoint: String) { + val gridClient = GridClient(apiKey, services, maxIdleConnections = 5) def isValidApiKey(): Boolean = { // Make an API key authenticated request to the leases API as a way of validating the API key. // A 200 indicates a valid key. @@ -138,7 +136,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { import config._ import services._ - def getMergedImageData(mediaId: String)(implicit ec: ExecutionContext): FullImageProjectionResult = { try { val maybeImageFuture = getMergedImageDataInternal(mediaId) @@ -151,7 +148,7 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } private def getMergedImageDataInternal(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Image]] = { - val maybeImage: Option[Image] = getImageLoaderProjection(mediaId) + val maybeImage: Option[Image] = gridClient.getImageLoaderProjection(mediaId, imageLoaderEndpoint) maybeImage match { case Some(img) => aggregate(img).map { aggImg => @@ -166,10 +163,10 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { val mediaId = image.id for { collections <- getCollectionsResponse(mediaId) - edits <- getEdits(mediaId) - leases <- getLeases(mediaId) - usages <- getUsages(mediaId) - crops <- getCrops(mediaId) + edits <- gridClient.getEdits(mediaId) + leases <- gridClient.getLeases(mediaId) + usages <- gridClient.getUsages(mediaId) + crops <- gridClient.getCrops(mediaId) } yield image.copy( collections = collections, userMetadata = edits, @@ -179,15 +176,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { ) } - private def getImageLoaderProjection(mediaId: String): Option[Image] = { - logger.info("attempt to get image projection from image-loader") - val url = new URL(s"$imageLoaderEndpoint/images/project/$mediaId") - val res = gridClient.makeGetRequestSync(url, apiKey) - validateResponse(res, url) - logger.info(s"got image projection from image-loader for $mediaId with status code $res.statusCode") - if (res.statusCode == 200) Some(res.body.as[Image]) else None - } - private def getCollectionsResponse(mediaId: String)(implicit ec: ExecutionContext): Future[List[Collection]] = { logger.info("attempt to get collections") val url = new URL(s"$collectionsBaseUri/images/$mediaId") @@ -197,49 +185,6 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } } - private def getEdits(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Edits]] = { - logger.info("attempt to get edits") - val url = new URL(s"$metadataBaseUri/edits/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) Some((res.body \ "data").as[Edits]) else None - } - } - - private def getCrops(mediaId: String)(implicit ec: ExecutionContext): Future[List[Crop]] = { - logger.info("attempt to get crops") - val url = new URL(s"$cropperBaseUri/crops/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) (res.body \ "data").as[List[Crop]] else Nil - } - } - - private def getLeases(mediaId: String)(implicit ec: ExecutionContext): Future[LeasesByMedia] = { - logger.info("attempt to get leases") - val url = new URL(s"$leasesBaseUri/leases/media/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) (res.body \ "data").as[LeasesByMedia] else LeasesByMedia.empty - } - } - - private def getUsages(mediaId: String)(implicit ec: ExecutionContext): Future[List[Usage]] = { - logger.info("attempt to get usages") - - def unpackUsagesFromEntityResponse(resBody: JsValue): List[JsValue] = { - (resBody \ "data").as[JsArray].value - .map(entity => (entity.as[JsObject] \ "data").as[JsValue]).toList - } - - val url = new URL(s"$usageBaseUri/usages/media/$mediaId") - gridClient.makeGetRequestAsync(url, apiKey).map { res => - validateResponse(res, url) - if (res.statusCode == 200) unpackUsagesFromEntityResponse(res.body).map(_.as[Usage]) - else Nil - } - } - private def validateResponse(res: ResponseWrapper, url: URL): Unit = { import res._ if (statusCode != 200 && statusCode != 404) { @@ -260,6 +205,3 @@ class ImageDataMerger(config: ImageDataMergerConfig) extends LazyLogging { } } -class DownstreamApiInBadStateException(message: String, downstreamMessage: String) extends IllegalStateException(message) { - def getDownstreamMessage = downstreamMessage -} diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala index 54ba3a0916..abc3429096 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala +++ b/admin-tools/lib/src/main/scala/com/gu/mediaservice/ImagesBatchProjection.scala @@ -7,7 +7,7 @@ import com.gu.mediaservice.model.Image import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} -class ImagesBatchProjection(apiKey: String, domainRoot: String, timeout: Duration, gridClient: GridClient, maxSize: Int) { +class ImagesBatchProjection(apiKey: String, timeout: Duration, gridClient: GridClient, maxSize: Int) { private implicit val ThrottledExecutionContext = ExecutionContext.fromExecutor(java.util.concurrent.Executors.newFixedThreadPool(5)) diff --git a/build.sbt b/build.sbt index 5227c3fd54..ea31a79a47 100644 --- a/build.sbt +++ b/build.sbt @@ -26,8 +26,11 @@ val commonSettings = Seq( publishArtifact in (Compile, packageDoc) := false ) +//Common projects to all organizations +lazy val commonProjects: Seq[sbt.ProjectReference] = Seq(commonLib, restLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) + lazy val root = project("grid", path = Some(".")) - .aggregate(commonLib, auth, collections, cropper, imageLoader, leases, thrall, kahuna, metadataEditor, usage, mediaApi, adminToolsLambda, adminToolsScripts, adminToolsDev) + .aggregate((maybeBBCLib.toList ++ commonProjects):_*) .enablePlugins(RiffRaffArtifact) .settings( riffRaffManifestProjectName := s"media-service::grid::all", @@ -67,20 +70,16 @@ val okHttpVersion = "3.12.1" val bbcBuildProcess: Boolean = System.getenv().asScala.get("BUILD_ORG").contains("bbc") -val bbcCommonLibSettings: SettingsDefinition = if (bbcBuildProcess) { - Seq( - libraryDependencies ++= Seq("com.gu" %% "pan-domain-auth-play_2-6" % "0.9.2-SNAPSHOT") - ) -} else { - Seq( - libraryDependencies ++= Seq("com.gu" %% "pan-domain-auth-play_2-6" % "0.8.2") - ) -} +//BBC specific project, it only gets compiled when bbcBuildProcess is true +lazy val bbcProject = project("bbc").dependsOn(restLib) + +val maybeBBCLib: Option[sbt.ProjectReference] = if(bbcBuildProcess) Some(bbcProject) else None lazy val commonLib = project("common-lib").settings( libraryDependencies ++= Seq( // also exists in plugins.sbt, TODO deduplicate this "com.gu" %% "editorial-permissions-client" % "2.0", + "com.gu" %% "pan-domain-auth-play_2-6" % "0.8.2", "com.amazonaws" % "aws-java-sdk-iam" % awsSdkVersion, "com.amazonaws" % "aws-java-sdk-s3" % awsSdkVersion, "com.amazonaws" % "aws-java-sdk-ec2" % awsSdkVersion, @@ -110,10 +109,11 @@ lazy val commonLib = project("common-lib").settings( "org.codehaus.janino" % "janino" % "3.0.6", "com.typesafe.play" %% "play-json-joda" % "2.6.9", "com.gu" %% "scanamo" % "1.0.0-M8", - "com.fasterxml.jackson.core" % "jackson-databind" % "2.9.10.7" + "com.fasterxml.jackson.core" % "jackson-databind" % "2.9.10.7", + "com.squareup.okhttp3" % "okhttp" % okHttpVersion ), dependencyOverrides += "org.apache.thrift" % "libthrift" % "0.9.1" -).settings(bbcCommonLibSettings) +) lazy val restLib = project("rest-lib").settings( libraryDependencies ++= Seq( @@ -134,7 +134,6 @@ lazy val cropper = playProject("cropper", 9006) lazy val imageLoader = playProject("image-loader", 9003).settings { libraryDependencies ++= Seq( - "com.squareup.okhttp3" % "okhttp" % okHttpVersion, "org.apache.tika" % "tika-core" % "1.20", "com.drewnoakes" % "metadata-extractor" % "2.15.0" ) @@ -180,7 +179,6 @@ lazy val adminToolsLib = project("admin-tools-lib", Some("admin-tools/lib")) ExclusionRule("com.gu", "kinesis-logback-appender") ), libraryDependencies ++= Seq( - "com.squareup.okhttp3" % "okhttp" % okHttpVersion, "com.typesafe.play" %% "play-json" % "2.6.9", "com.typesafe.play" %% "play-json-joda" % "2.6.9", "com.typesafe.play" %% "play-functional" % "2.6.9", @@ -247,6 +245,15 @@ lazy val usage = playProject("usage", 9009).settings( lazy val scripts = project("scripts") .dependsOn(commonLib) + .enablePlugins(JavaAppPackaging, UniversalPlugin) + .settings( + libraryDependencies ++= Seq( + // V2 of the AWS SDK as it's easier to use for scripts and won't leak to the rest of the project from here + "software.amazon.awssdk" % "s3" % "2.15.81", + // bump jcommander explicitly as AWS SDK is pulling in a vulnerable version + "com.beust" % "jcommander" % "1.75" + ) + ) lazy val migration = project("migration") .dependsOn(commonLib). @@ -281,8 +288,8 @@ val buildInfo = Seq( ) ) -def playProject(projectName: String, port: Int, path: Option[String] = None): Project = - project(projectName, path) +def playProject(projectName: String, port: Int, path: Option[String] = None): Project = { + val commonProject = project(projectName, path) .enablePlugins(PlayScala, JDebPackaging, SystemdPlugin, BuildInfoPlugin) .dependsOn(restLib) .settings(commonSettings ++ buildInfo ++ Seq( @@ -309,3 +316,6 @@ def playProject(projectName: String, port: Int, path: Option[String] = None): Pr "-J-XX:GCLogFileSize=2M" ) )) + //Add the BBC library dependency if defined + maybeBBCLib.fold(commonProject){commonProject.dependsOn(_)} +} diff --git a/admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala b/common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala similarity index 55% rename from admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala rename to common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala index ef2589ab8f..8d9a4ed3a8 100644 --- a/admin-tools/lib/src/main/scala/com/gu/mediaservice/GridClient.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/GridClient.scala @@ -2,13 +2,18 @@ package com.gu.mediaservice import java.io.IOException import java.net.URL + import com.gu.mediaservice.lib.auth.provider.ApiKeyAuthentication +import com.gu.mediaservice.lib.config.Services +import com.gu.mediaservice.model.{Crop, Edits, Image} +import com.gu.mediaservice.model.leases.LeasesByMedia +import com.gu.mediaservice.model.usage.Usage import com.typesafe.scalalogging.LazyLogging +import play.api.libs.json.{JsArray, JsObject, JsValue, Json} import okhttp3._ -import play.api.libs.json.{JsValue, Json} -import scala.util.{Failure, Success, Try} import scala.concurrent.{ExecutionContext, Future, Promise} +import scala.util.{Failure, Success, Try} object ClientResponse { case class Message(errorMessage: String, downstreamErrorMessage: String) @@ -35,10 +40,11 @@ case class ClientErrorMessages(errorMessage: String, downstreamErrorMessage: Str case class ResponseWrapper(body: JsValue, statusCode: Int, bodyAsString: String) object GridClient { - def apply(maxIdleConnections: Int, debugHttpResponse: Boolean = true): GridClient = new GridClient(maxIdleConnections, debugHttpResponse) + def apply(apiKey: String, services: Services, maxIdleConnections: Int, debugHttpResponse: Boolean = true): GridClient = + new GridClient(apiKey, services, maxIdleConnections, debugHttpResponse) } -class GridClient(maxIdleConnections: Int, debugHttpResponse: Boolean) extends ApiKeyAuthentication with LazyLogging { +class GridClient(apiKey: String, services: Services, maxIdleConnections: Int, debugHttpResponse: Boolean) extends ApiKeyAuthentication with LazyLogging { import java.util.concurrent.TimeUnit @@ -136,5 +142,82 @@ class GridClient(maxIdleConnections: Int, debugHttpResponse: Boolean) extends Ap }) promise.future } + + private def validateResponse(res: ResponseWrapper, url: URL): Unit = { + import res._ + if (statusCode != 200 && statusCode != 404) { + val errorMessage = s"breaking the circuit of full image projection, downstream API: $url is in a bad state, code: $statusCode" + val downstreamErrorMessage = res.bodyAsString + + val errorJson = Json.obj( + "level" -> "ERROR", + "errorStatusCode" -> statusCode, + "message" -> Json.obj( + "errorMessage" -> errorMessage, + "downstreamErrorMessage" -> downstreamErrorMessage + ) + ) + logger.error(errorJson.toString()) + throw new DownstreamApiInBadStateException(errorMessage, downstreamErrorMessage) + } + } + + def getImageLoaderProjection(mediaId: String, imageLoaderEndpoint: String): Option[Image] = { + logger.info("attempt to get image projection from image-loader") + val url = new URL(s"${imageLoaderEndpoint}/images/project/$mediaId") + val res = makeGetRequestSync(url, apiKey) + validateResponse(res, url) + logger.info(s"got image projection from image-loader for $mediaId with status code $res.statusCode") + if (res.statusCode == 200) Some(res.body.as[Image]) else None + } + + def getLeases(mediaId: String)(implicit ec: ExecutionContext): Future[LeasesByMedia] = { + logger.info("attempt to get leases") + val url = new URL(s"${services.leasesBaseUri}/leases/media/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) (res.body \ "data").as[LeasesByMedia] else LeasesByMedia.empty + } + } + + def getEdits(mediaId: String)(implicit ec: ExecutionContext): Future[Option[Edits]] = { + logger.info("attempt to get edits") + val url = new URL(s"${services.metadataBaseUri}/edits/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) Some((res.body \ "data").as[Edits]) else None + } + } + + def getCrops(mediaId: String)(implicit ec: ExecutionContext): Future[List[Crop]] = { + logger.info("attempt to get crops") + val url = new URL(s"${services.cropperBaseUri}/crops/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) (res.body \ "data").as[List[Crop]] else Nil + } + } + + def getUsages(mediaId: String)(implicit ec: ExecutionContext): Future[List[Usage]] = { + logger.info("attempt to get usages") + + def unpackUsagesFromEntityResponse(resBody: JsValue): List[JsValue] = { + (resBody \ "data").as[JsArray].value + .map(entity => (entity.as[JsObject] \ "data").as[JsValue]).toList + } + + val url = new URL(s"${services.usageBaseUri}/usages/media/$mediaId") + makeGetRequestAsync(url, apiKey).map { res => + validateResponse(res, url) + if (res.statusCode == 200) unpackUsagesFromEntityResponse(res.body).map(_.as[Usage]) + else Nil + } + } + + + } +class DownstreamApiInBadStateException(message: String, downstreamMessage: String) extends IllegalStateException(message) { + def getDownstreamMessage = downstreamMessage +} diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala index 1b070331fd..f70a00d2b3 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageIngestOperations.scala @@ -29,15 +29,18 @@ class ImageIngestOperations(imageBucket: String, thumbnailBucket: String, config private def storeOriginalImage(storableImage: StorableOriginalImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), storableImage.meta) + storeImage(imageBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + storableImage.meta, overwrite = false) private def storeThumbnailImage(storableImage: StorableThumbImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(thumbnailBucket, fileKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) private def storeOptimisedImage(storableImage: StorableOptimisedImage) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType)) + storeImage(imageBucket, optimisedPngKeyFromId(storableImage.id), storableImage.file, Some(storableImage.mimeType), + overwrite = true) def deleteOriginal(id: String): Future[Unit] = if(isVersionedS3) deleteVersionedImage(imageBucket, fileKeyFromId(id)) else deleteImage(imageBucket, fileKeyFromId(id)) def deleteThumbnail(id: String): Future[Unit] = deleteImage(thumbnailBucket, fileKeyFromId(id)) diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala index 73d9fe81dd..9de53d9490 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageQuarantineOperations.scala @@ -14,7 +14,7 @@ class ImageQuarantineOperations(quarantineBucket: String, config: CommonConfig, def storeQuarantineImage(id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) (implicit logMarker: LogMarker): Future[S3Object] = - storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta) + storeImage(quarantineBucket, ImageIngestOperations.fileKeyFromId(id), file, mimeType, meta, overwrite = true) } diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala index ad4ea3f1c6..00c59ac18e 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/ImageStorage.scala @@ -13,6 +13,10 @@ import com.gu.mediaservice.model.MimeType object ImageStorageProps { val cacheDuration: Duration = 365 days val cacheForever: String = s"max-age=${cacheDuration.toSeconds}" + val filenameMetadataKey: String = "file-name" + val uploadTimeMetadataKey: String = "upload-time" + val uploadedByMetadataKey: String = "uploaded-by" + val identifierMetadataKeyPrefix: String = "identifier!" } trait ImageStorage { @@ -30,7 +34,8 @@ trait ImageStorage { /** Store a copy of the given file and return the URI of that copy. * The file can safely be deleted afterwards. */ - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker): Future[S3Object] def deleteImage(bucket: String, id: String): Future[Unit] diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala index 0c7ed5d767..79fed45686 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/S3ImageStorage.scala @@ -1,13 +1,12 @@ package com.gu.mediaservice.lib -import java.io.File - -import com.gu.mediaservice.lib.aws.{S3, S3Ops} +import com.gu.mediaservice.lib.aws.S3 import com.gu.mediaservice.lib.config.CommonConfig import com.gu.mediaservice.lib.logging.LogMarker import com.gu.mediaservice.model.MimeType import org.slf4j.LoggerFactory +import java.io.File import scala.collection.JavaConverters._ import scala.concurrent.Future @@ -16,13 +15,14 @@ class S3ImageStorage(config: CommonConfig) extends S3(config) with ImageStorage private val log = LoggerFactory.getLogger(getClass) private val cacheSetting = Some(cacheForever) - def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty) + def storeImage(bucket: String, id: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, overwrite: Boolean) (implicit logMarker: LogMarker) = { - store(bucket, id, file, mimeType, meta, cacheSetting) - .map( _ => - // TODO this is just giving back the stuff we passed in and should be factored out. - S3Ops.projectFileAsS3Object(bucket, id, file, mimeType, meta, cacheSetting) - ) + if (overwrite) { + store(bucket, id, file, mimeType, meta, cacheSetting) + } else { + storeIfNotPresent(bucket, id, file, mimeType, meta, cacheSetting) + } } def deleteImage(bucket: String, id: String) = Future { diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala index fafc81872b..9657da3969 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/aws/S3.scala @@ -3,7 +3,6 @@ package com.gu.mediaservice.lib.aws import java.io.File import java.net.{URI, URLEncoder} import java.nio.charset.{Charset, StandardCharsets} - import com.amazonaws.{AmazonServiceException, ClientConfiguration} import com.amazonaws.services.s3.model._ import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder, model} @@ -19,8 +18,47 @@ import scala.concurrent.{ExecutionContext, Future} case class S3Object(uri: URI, size: Long, metadata: S3Metadata) +object S3Object { + def objectUrl(bucket: String, key: String): URI = { + val bucketUrl = s"$bucket.${S3Ops.s3Endpoint}" + new URI("http", bucketUrl, s"/$key", null) + } + + def apply(bucket: String, key: String, size: Long, metadata: S3Metadata): S3Object = + apply(objectUrl(bucket, key), size, metadata) + + def apply(bucket: String, key: String, file: File, mimeType: Option[MimeType], + meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { + S3Object( + bucket, + key, + file.length, + S3Metadata( + meta, + S3ObjectMetadata( + mimeType, + cacheControl + ) + ) + ) + } +} + case class S3Metadata(userMetadata: Map[String, String], objectMetadata: S3ObjectMetadata) +object S3Metadata { + def apply(meta: ObjectMetadata): S3Metadata = { + S3Metadata( + meta.getUserMetadata.asScala.toMap, + S3ObjectMetadata( + contentType = Option(meta.getContentType).map(MimeType.apply), + cacheControl = Option(meta.getCacheControl), + lastModified = Option(meta.getLastModified).map(new DateTime(_)) + ) + ) + } +} + case class S3ObjectMetadata(contentType: Option[MimeType], cacheControl: Option[String], lastModified: Option[DateTime] = None) class S3(config: CommonConfig) extends GridLogging { @@ -28,8 +66,6 @@ class S3(config: CommonConfig) extends GridLogging { type Key = String type UserMetadata = Map[String, String] - import S3Ops.objectUrl - lazy val client: AmazonS3 = S3Ops.buildS3Client(config) // also create a legacy client that uses v2 signatures for URL signing private lazy val legacySigningClient: AmazonS3 = S3Ops.buildS3Client(config, forceV2Sigs = true) @@ -116,7 +152,7 @@ class S3(config: CommonConfig) extends GridLogging { } def store(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) - (implicit ex: ExecutionContext, logMarker: LogMarker): Future[Unit] = + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = Future { val metadata = new ObjectMetadata mimeType.foreach(m => metadata.setContentType(m.name)) @@ -133,9 +169,28 @@ class S3(config: CommonConfig) extends GridLogging { val req = new PutObjectRequest(bucket, id, file).withMetadata(metadata) Stopwatch(s"S3 client.putObject ($req)"){ client.putObject(req) + // once we've completed the PUT read back to ensure that we are returning reality + val metadata = client.getObjectMetadata(bucket, id) + S3Object(bucket, id, metadata.getContentLength, S3Metadata(metadata)) }(markers) } + def storeIfNotPresent(bucket: Bucket, id: Key, file: File, mimeType: Option[MimeType], meta: UserMetadata = Map.empty, cacheControl: Option[String] = None) + (implicit ex: ExecutionContext, logMarker: LogMarker): Future[S3Object] = { + Future{ + Some(client.getObjectMetadata(bucket, id)) + }.recover { + // translate this exception into the object not existing + case as3e:AmazonS3Exception if as3e.getStatusCode == 404 => None + }.flatMap { + case Some(objectMetadata) => + log.info(s"Skipping storing of S3 file $id as key is already present in bucket $bucket") + Future.successful(S3Object(bucket, id, objectMetadata.getContentLength, S3Metadata(objectMetadata))) + case None => + store(bucket, id, file, mimeType, meta, cacheControl) + } + } + def list(bucket: Bucket, prefixDir: String) (implicit ex: ExecutionContext): Future[List[S3Object]] = Future { @@ -144,21 +199,13 @@ class S3(config: CommonConfig) extends GridLogging { val summaries = listing.getObjectSummaries.asScala summaries.map(summary => (summary.getKey, summary)).foldLeft(List[S3Object]()) { case (memo: List[S3Object], (key: String, summary: S3ObjectSummary)) => - S3Object(objectUrl(bucket, key), summary.getSize, getMetadata(bucket, key)) :: memo + S3Object(bucket, key, summary.getSize, getMetadata(bucket, key)) :: memo } } def getMetadata(bucket: Bucket, key: Key): S3Metadata = { val meta = client.getObjectMetadata(bucket, key) - - S3Metadata( - meta.getUserMetadata.asScala.toMap, - S3ObjectMetadata( - contentType = Option(MimeType(meta.getContentType)), - cacheControl = Option(meta.getCacheControl), - lastModified = Option(meta.getLastModified).map(new DateTime(_)) - ) - ) + S3Metadata(meta) } def getUserMetadata(bucket: Bucket, key: Key): Map[Bucket, Bucket] = @@ -176,7 +223,7 @@ class S3(config: CommonConfig) extends GridLogging { object S3Ops { // TODO make this localstack friendly // TODO: Make this region aware - i.e. RegionUtils.getRegion(region).getServiceEndpoint(AmazonS3.ENDPOINT_PREFIX) - private val s3Endpoint = "s3.amazonaws.com" + val s3Endpoint = "s3.amazonaws.com" def buildS3Client(config: CommonConfig, forceV2Sigs: Boolean = false, localstackAware: Boolean = true): AmazonS3 = { @@ -198,27 +245,4 @@ object S3Ops { config.withAWSCredentials(builder, localstackAware).build() } - - def objectUrl(bucket: String, key: String): URI = { - val bucketUrl = s"$bucket.$s3Endpoint" - new URI("http", bucketUrl, s"/$key", null) - } - - def projectFileAsS3Object(url: URI, file: File, mimeType: Option[MimeType], meta: Map[String, String], cacheControl: Option[String]): S3Object = { - S3Object( - url, - file.length, - S3Metadata( - meta, - S3ObjectMetadata( - mimeType, - cacheControl - ) - ) - ) - } - - def projectFileAsS3Object(bucket: String, key: String, file: File, mimeType: Option[MimeType], meta: Map[String, String] = Map.empty, cacheControl: Option[String] = None): S3Object = { - projectFileAsS3Object(objectUrl(bucket, key), file, mimeType, meta, cacheControl) - } } diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala index 8b5a876e84..bd895f747b 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/CommonConfig.scala @@ -11,11 +11,6 @@ import scala.util.Try abstract class CommonConfig(val configuration: Configuration) extends AwsClientBuilderUtils with StrictLogging { - final val elasticsearchStack = "media-service" - - final val elasticsearchApp = "elasticsearch" - final val elasticsearch6App = "elasticsearch6" - final val stackName = "media-service" final val sessionId = UUID.randomUUID().toString diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala index b3ba44679d..5848a44a2d 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/config/MetadataConfig.scala @@ -129,8 +129,6 @@ object MetadataConfig { ) val staffIllustrators = List( - "Mona Chalabi", - "Sam Morris", "Guardian Design" ) diff --git a/cropper/app/lib/CropStore.scala b/cropper/app/lib/CropStore.scala index 1e43eeace4..65c59b678e 100644 --- a/cropper/app/lib/CropStore.scala +++ b/cropper/app/lib/CropStore.scala @@ -35,7 +35,7 @@ class CropStore(config: CropperConfig) extends S3ImageStorage(config) { case (key, value) => key -> value }.mapValues(_.toString) - storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata) map { s3Object => + storeImage(config.imgPublishingBucket, filename, file, Some(mimeType), filteredMetadata, overwrite = true) map { s3Object => Asset( translateImgHost(s3Object.uri), Some(s3Object.size), diff --git a/image-loader/app/model/Projector.scala b/image-loader/app/model/Projector.scala index 18fa1d2d48..14550d2fbe 100644 --- a/image-loader/app/model/Projector.scala +++ b/image-loader/app/model/Projector.scala @@ -2,11 +2,11 @@ package model import java.io.{File, FileOutputStream} import java.util.UUID - import com.amazonaws.services.s3.AmazonS3 -import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object} -import com.gu.mediaservice.lib.{ImageIngestOperations, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} +import com.amazonaws.services.s3.model.{ObjectMetadata, S3Object => AwsS3Object} +import com.gu.mediaservice.lib.{ImageIngestOperations, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.S3Ops +import com.gu.mediaservice.lib.aws.S3Object import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.LogMarker @@ -35,28 +35,44 @@ case class S3FileExtractedMetadata( uploadedBy: String, uploadTime: DateTime, uploadFileName: Option[String], - picdarUrn: Option[String] + identifiers: Map[String, String] ) object S3FileExtractedMetadata { def apply(s3ObjectMetadata: ObjectMetadata): S3FileExtractedMetadata = { - val lastModified = s3ObjectMetadata.getLastModified.toInstant.toString - val fileUserMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + val lastModified = new DateTime(s3ObjectMetadata.getLastModified) + val userMetadata = s3ObjectMetadata.getUserMetadata.asScala.toMap + apply(lastModified, userMetadata) + } - val uploadedBy = fileUserMetadata.getOrElse("uploaded_by", "re-ingester") - val uploadedTimeRaw = fileUserMetadata.getOrElse("upload_time", lastModified) - val uploadTime = new DateTime(uploadedTimeRaw).withZone(DateTimeZone.UTC) - val picdarUrn = fileUserMetadata.get("identifier!picdarurn") + def apply(lastModified: DateTime, userMetadata: Map[String, String]): S3FileExtractedMetadata = { + val fileUserMetadata = userMetadata.map { case (key, value) => + // Fix up the contents of the metadata. + ( + // The keys used to be named with underscores instead of dashes but due to localstack being written in Python + // this didn't work locally (see https://github.com/localstack/localstack/issues/459) + key.replaceAll("_", "-"), + // The values are now all URL encoded and it is assumed safe to decode historical values too (based on the tested corpus) + URI.decode(value) + ) + } - val uploadFileNameRaw = fileUserMetadata.get("file_name") - // The file name is URL encoded in S3 metadata - val uploadFileName = uploadFileNameRaw.map(URI.decode) + val uploadedBy = fileUserMetadata.getOrElse(ImageStorageProps.uploadedByMetadataKey, "re-ingester") + val uploadedTimeRaw = fileUserMetadata.get(ImageStorageProps.uploadTimeMetadataKey).map(new DateTime(_).withZone(DateTimeZone.UTC)) + val uploadTime = uploadedTimeRaw.getOrElse(lastModified) + val identifiers = fileUserMetadata.filter{ case (key, _) => + key.startsWith(ImageStorageProps.identifierMetadataKeyPrefix) + }.map{ case (key, value) => + key.stripPrefix(ImageStorageProps.identifierMetadataKeyPrefix) -> value + } + + val uploadFileName = fileUserMetadata.get(ImageStorageProps.filenameMetadataKey) S3FileExtractedMetadata( uploadedBy = uploadedBy, uploadTime = uploadTime, uploadFileName = uploadFileName, - picdarUrn = picdarUrn, + identifiers = identifiers, ) } } @@ -89,21 +105,17 @@ class Projector(config: ImageUploadOpsCfg, } } - private def getSrcFileDigestForProjection(s3Src: S3Object, imageId: String, tempFile: File) = { + private def getSrcFileDigestForProjection(s3Src: AwsS3Object, imageId: String, tempFile: File) = { IOUtils.copy(s3Src.getObjectContent, new FileOutputStream(tempFile)) DigestedFile(tempFile, imageId) } def projectImage(srcFileDigest: DigestedFile, extractedS3Meta: S3FileExtractedMetadata, requestId: UUID) (implicit ec: ExecutionContext, logMarker: LogMarker): Future[Image] = { - import extractedS3Meta._ val DigestedFile(tempFile_, id_) = srcFileDigest - // TODO more identifiers_ to rehydrate - val identifiers_ = picdarUrn match { - case Some(value) => Map[String, String]("picdarURN" -> value) - case _ => Map[String, String]() - } - val uploadInfo_ = UploadInfo(filename = uploadFileName) + + val identifiers_ = extractedS3Meta.identifiers + val uploadInfo_ = UploadInfo(filename = extractedS3Meta.uploadFileName) MimeTypeDetection.guessMimeType(tempFile_) match { case util.Left(unsupported) => Future.failed(unsupported) @@ -113,8 +125,8 @@ class Projector(config: ImageUploadOpsCfg, imageId = id_, tempFile = tempFile_, mimeType = Some(mimeType), - uploadTime = uploadTime, - uploadedBy, + uploadTime = extractedS3Meta.uploadTime, + uploadedBy = extractedS3Meta.uploadedBy, identifiers = identifiers_, uploadInfo = uploadInfo_ ) @@ -140,7 +152,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOriginalFileAsS3Model(storableOriginalImage: StorableOriginalImage) (implicit ec: ExecutionContext)= Future { val key = ImageIngestOperations.fileKeyFromId(storableOriginalImage.id) - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOriginalImage.file, @@ -152,7 +164,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectThumbnailFileAsS3Model(storableThumbImage: StorableThumbImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.fileKeyFromId(storableThumbImage.id) val thumbMimeType = Some(ImageOperations.thumbMimeType) - S3Ops.projectFileAsS3Object( + S3Object( config.thumbBucket, key, storableThumbImage.file, @@ -163,7 +175,7 @@ class ImageUploadProjectionOps(config: ImageUploadOpsCfg, private def projectOptimisedPNGFileAsS3Model(storableOptimisedImage: StorableOptimisedImage)(implicit ec: ExecutionContext) = Future { val key = ImageIngestOperations.optimisedPngKeyFromId(storableOptimisedImage.id) val optimisedPngMimeType = Some(ImageOperations.thumbMimeType) // this IS what we will generate. - S3Ops.projectFileAsS3Object( + S3Object( config.originalFileBucket, key, storableOptimisedImage.file, diff --git a/image-loader/app/model/Uploader.scala b/image-loader/app/model/Uploader.scala index 23b89ff006..4d2168f57e 100644 --- a/image-loader/app/model/Uploader.scala +++ b/image-loader/app/model/Uploader.scala @@ -5,11 +5,10 @@ import java.net.URLEncoder import java.nio.charset.StandardCharsets import java.nio.file.Files import java.util.UUID - import com.gu.mediaservice.lib.argo.ArgoHelpers import com.gu.mediaservice.lib.auth.Authentication import com.gu.mediaservice.lib.auth.Authentication.Principal -import com.gu.mediaservice.lib.{BrowserViewableImage, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} +import com.gu.mediaservice.lib.{BrowserViewableImage, ImageStorageProps, StorableOptimisedImage, StorableOriginalImage, StorableThumbImage} import com.gu.mediaservice.lib.aws.{S3Object, UpdateMessage} import com.gu.mediaservice.lib.cleanup.{ImageProcessor, MetadataCleaners, SupplierProcessors} import com.gu.mediaservice.lib.config.MetadataConfig @@ -17,6 +16,7 @@ import com.gu.mediaservice.lib.formatting._ import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging._ import com.gu.mediaservice.lib.metadata.{FileMetadataHelper, ImageMetadataConverter} +import com.gu.mediaservice.lib.net.URI import com.gu.mediaservice.model._ import lib.{DigestedFile, ImageLoaderConfig, Notifications} import lib.imaging.{FileMetadataReader, MimeTypeDetection} @@ -146,6 +146,7 @@ object Uploader extends GridLogging { val eventualImage = for { browserViewableImage <- eventualBrowserViewableImage s3Source <- sourceStoreFuture + mergedUploadRequest = patchUploadRequestWithS3Metadata(uploadRequest, s3Source) optimisedFileMetadata <- FileMetadataReader.fromIPTCHeadersWithColorInfo(browserViewableImage) thumbViewableImage <- createThumbFuture(optimisedFileMetadata, colourModelFuture, browserViewableImage, deps) s3Thumb <- storeOrProjectThumbFile(thumbViewableImage) @@ -165,7 +166,7 @@ object Uploader extends GridLogging { val thumbAsset = Asset.fromS3Object(s3Thumb, thumbDimensions) val pngAsset = s3PngOption.map(Asset.fromS3Object(_, sourceDimensions)) - val baseImage = ImageUpload.createImage(uploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) + val baseImage = ImageUpload.createImage(mergedUploadRequest, sourceAsset, thumbAsset, pngAsset, fullFileMetadata, metadata) val processedImage = processor(baseImage) @@ -201,14 +202,13 @@ object Uploader extends GridLogging { def toMetaMap(uploadRequest: UploadRequest): Map[String, String] = { val baseMeta = Map( - "uploaded_by" -> uploadRequest.uploadedBy, - "upload_time" -> printDateTime(uploadRequest.uploadTime) - ) ++ uploadRequest.identifiersMeta + ImageStorageProps.uploadedByMetadataKey -> uploadRequest.uploadedBy, + ImageStorageProps.uploadTimeMetadataKey -> printDateTime(uploadRequest.uploadTime) + ) ++ + uploadRequest.identifiersMeta ++ + uploadRequest.uploadInfo.filename.map(ImageStorageProps.filenameMetadataKey -> _) - uploadRequest.uploadInfo.filename match { - case Some(f) => baseMeta ++ Map("file_name" -> URLEncoder.encode(f, StandardCharsets.UTF_8.name())) - case _ => baseMeta - } + baseMeta.mapValues(URI.encode) } private def toFileMetadata(f: File, imageId: String, mimeType: Option[MimeType]): Future[FileMetadata] = { @@ -258,6 +258,16 @@ object Uploader extends GridLogging { case None => Future.failed(new Exception("This file is not an image with an identifiable mime type")) } } + + def patchUploadRequestWithS3Metadata(request: UploadRequest, s3Object: S3Object): UploadRequest = { + val metadata = S3FileExtractedMetadata(s3Object.metadata.objectMetadata.lastModified.getOrElse(new DateTime), s3Object.metadata.userMetadata) + request.copy( + uploadTime = metadata.uploadTime, + uploadedBy = metadata.uploadedBy, + uploadInfo = request.uploadInfo.copy(filename = metadata.uploadFileName), + identifiers = metadata.identifiers + ) + } } class Uploader(val store: ImageLoaderStore, @@ -297,7 +307,10 @@ class Uploader(val store: ImageLoaderStore, val DigestedFile(tempFile, id) = digestedFile // TODO: should error if the JSON parsing failed - val identifiersMap = identifiers.map(Json.parse(_).as[Map[String, String]]) getOrElse Map() + val identifiersMap = identifiers + .map(Json.parse(_).as[Map[String, String]]) + .getOrElse(Map.empty) + .mapValues(_.toLowerCase) MimeTypeDetection.guessMimeType(tempFile) match { case util.Left(unsupported) => diff --git a/image-loader/app/model/upload/UploadRequest.scala b/image-loader/app/model/upload/UploadRequest.scala index b256776a0a..e3298e7ef1 100644 --- a/image-loader/app/model/upload/UploadRequest.scala +++ b/image-loader/app/model/upload/UploadRequest.scala @@ -1,12 +1,14 @@ package model.upload +import com.gu.mediaservice.lib.ImageStorageProps + import java.io.File import java.util.UUID - import com.gu.mediaservice.model.{MimeType, UploadInfo} import net.logstash.logback.marker.{LogstashMarker, Markers} import org.joda.time.format.ISODateTimeFormat import org.joda.time.{DateTime, DateTimeZone} + import scala.collection.JavaConverters._ case class UploadRequest( @@ -20,7 +22,9 @@ case class UploadRequest( uploadInfo: UploadInfo ) { - val identifiersMeta: Map[String, String] = identifiers.map { case (k, v) => (s"identifier!$k", v) } + val identifiersMeta: Map[String, String] = identifiers.map { case (k, v) => + (s"${ImageStorageProps.identifierMetadataKeyPrefix}$k", v) + } def toLogMarker: LogstashMarker = { val fallback = "none" diff --git a/image-loader/test/scala/model/ImageUploadTest.scala b/image-loader/test/scala/model/ImageUploadTest.scala index 4e6ec20e16..4c6f1cd226 100644 --- a/image-loader/test/scala/model/ImageUploadTest.scala +++ b/image-loader/test/scala/model/ImageUploadTest.scala @@ -53,7 +53,7 @@ class ImageUploadTest extends AsyncFunSuite with Matchers with MockitoSugar { def mockStore = (a: StorableImage) => Future.successful( - S3Ops.projectFileAsS3Object(new URI("http://madeupname/"), a.file, Some(a.mimeType), a.meta, None) + S3Object("madeupname", "madeupkey", a.file, Some(a.mimeType), a.meta, None) ) def storeOrProjectOriginalFile: StorableOriginalImage => Future[S3Object] = mockStore diff --git a/image-loader/test/scala/model/ProjectorTest.scala b/image-loader/test/scala/model/ProjectorTest.scala index ba860dc3e9..4b54b3632c 100644 --- a/image-loader/test/scala/model/ProjectorTest.scala +++ b/image-loader/test/scala/model/ProjectorTest.scala @@ -2,9 +2,9 @@ package model import java.io.File import java.net.URI -import java.util.UUID - +import java.util.{Date, UUID} import com.amazonaws.services.s3.AmazonS3 +import com.amazonaws.services.s3.model.ObjectMetadata import com.gu.mediaservice.lib.cleanup.ImageProcessor import com.gu.mediaservice.lib.imaging.ImageOperations import com.gu.mediaservice.lib.logging.RequestLoggingContext @@ -15,13 +15,14 @@ import org.joda.time.{DateTime, DateTimeZone} import org.scalatest.concurrent.ScalaFutures import org.scalatest.mockito.MockitoSugar import org.scalatest.time.{Millis, Span} -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.{FreeSpec, FunSuite, Matchers} import play.api.libs.json.{JsArray, JsString} import test.lib.ResourceHelpers import scala.concurrent.ExecutionContext.Implicits.global +import scala.collection.JavaConverters._ -class ProjectorTest extends FunSuite with Matchers with ScalaFutures with MockitoSugar { +class ProjectorTest extends FreeSpec with Matchers with ScalaFutures with MockitoSugar { import ResourceHelpers.fileAt @@ -39,7 +40,7 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit // FIXME temporary ignored as test is not executable in CI/CD machine // because graphic lib files like srgb.icc, cmyk.icc are in root directory instead of resources // this test is passing when running on local machine - ignore("projectImage") { + "projectImage" ignore { val testFile = fileAt("resources/getty.jpg") val fileDigest = DigestedFile(testFile, "id123") @@ -159,7 +160,7 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit uploadedBy = uploadedBy, uploadTime = uploadTime, uploadFileName = uploadFileName, - picdarUrn = None, + identifiers = Map.empty, ) implicit val requestLoggingContext = RequestLoggingContext() @@ -171,5 +172,58 @@ class ProjectorTest extends FunSuite with Matchers with ScalaFutures with Mockit } } + "S3FileExtractedMetadata" - { + "should extract URL encoded metadata" in { + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "file-name" -> "This%20photo%20was%20taken%20in%20%C5%81%C3%B3d%C5%BA.jpg", + "uploaded-by" -> "s%C3%A9b.cevey%40theguardian.co.uk", + "upload-time" -> "2021-02-01T12%3A52%3A34%2B09%3A00", + "identifier!picdarurn" -> "12*543%5E25" + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadFileName shouldBe Some("This photo was taken in Łódź.jpg") + result.uploadedBy shouldBe "séb.cevey@theguardian.co.uk" + result.uploadTime.toString shouldBe "2021-02-01T03:52:34.000Z" + result.identifiers.size shouldBe 1 + result.identifiers.get("picdarurn") shouldBe Some("12*543^25") + } + + "should remap headers with underscores to dashes" in { + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "file_name" -> "filename.jpg", + "uploaded_by" -> "user", + "upload_time" -> "2021-02-01T12%3A52%3A34%2B09%3A00", + "identifier!picdarurn" -> "12*543" + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadFileName shouldBe Some("filename.jpg") + result.uploadedBy shouldBe "user" + result.uploadTime.toString shouldBe "2021-02-01T03:52:34.000Z" + result.identifiers.size shouldBe 1 + result.identifiers.get("picdarurn") shouldBe Some("12*543") + } + + "should correctly read in non URL encoded values" in { + // we have plenty of values in S3 that are not URL encoded + // and we must be able to read them correctly + val s3Metadata = new ObjectMetadata() + s3Metadata.setLastModified(new Date(1613388118000L)) + s3Metadata.setUserMetadata(Map( + "uploaded_by" -> "user", + "upload_time" -> "2019-12-11T01:12:10.427Z", + ).asJava) + + val result = S3FileExtractedMetadata(s3Metadata) + result.uploadedBy shouldBe "user" + result.uploadTime.toString shouldBe "2019-12-11T01:12:10.427Z" + } + } + } diff --git a/common-lib/src/test/resources/application.conf b/rest-lib/src/test/resources/application.conf similarity index 100% rename from common-lib/src/test/resources/application.conf rename to rest-lib/src/test/resources/application.conf diff --git a/common-lib/src/test/resources/cmyk.jpg b/rest-lib/src/test/resources/cmyk.jpg similarity index 100% rename from common-lib/src/test/resources/cmyk.jpg rename to rest-lib/src/test/resources/cmyk.jpg diff --git a/common-lib/src/test/resources/grayscale-with-profile.jpg b/rest-lib/src/test/resources/grayscale-with-profile.jpg similarity index 100% rename from common-lib/src/test/resources/grayscale-with-profile.jpg rename to rest-lib/src/test/resources/grayscale-with-profile.jpg diff --git a/common-lib/src/test/resources/grayscale-wo-profile.jpg b/rest-lib/src/test/resources/grayscale-wo-profile.jpg similarity index 100% rename from common-lib/src/test/resources/grayscale-wo-profile.jpg rename to rest-lib/src/test/resources/grayscale-wo-profile.jpg diff --git a/common-lib/src/test/resources/rgb-with-cmyk-profile.jpg b/rest-lib/src/test/resources/rgb-with-cmyk-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-with-cmyk-profile.jpg rename to rest-lib/src/test/resources/rgb-with-cmyk-profile.jpg diff --git a/common-lib/src/test/resources/rgb-with-rgb-profile.jpg b/rest-lib/src/test/resources/rgb-with-rgb-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-with-rgb-profile.jpg rename to rest-lib/src/test/resources/rgb-with-rgb-profile.jpg diff --git a/common-lib/src/test/resources/rgb-wo-profile.jpg b/rest-lib/src/test/resources/rgb-wo-profile.jpg similarity index 100% rename from common-lib/src/test/resources/rgb-wo-profile.jpg rename to rest-lib/src/test/resources/rgb-wo-profile.jpg diff --git a/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala b/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala index ddc0c1a305..6ac73db9fe 100644 --- a/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala +++ b/rest-lib/src/test/scala/com/gu/mediaservice/lib/auth/AuthenticationTest.scala @@ -5,7 +5,7 @@ import akka.stream.ActorMaterializer import com.gu.mediaservice.lib.auth.Authentication.{MachinePrincipal, OnBehalfOfPrincipal, UserPrincipal} import com.gu.mediaservice.lib.auth.provider.AuthenticationProvider.RedirectUri import com.gu.mediaservice.lib.auth.provider._ -import com.gu.mediaservice.lib.config.{CommonConfig, TestProvider} +import com.gu.mediaservice.lib.config.CommonConfig import org.scalatest.{AsyncFreeSpec, BeforeAndAfterAll, EitherValues, Matchers} import org.scalatestplus.play.PlaySpec import play.api.{Configuration, Environment} diff --git a/scripts/reindex-images/package-lock.json b/scripts/reindex-images/package-lock.json index 1659dd7a16..b6b2d80b2a 100644 --- a/scripts/reindex-images/package-lock.json +++ b/scripts/reindex-images/package-lock.json @@ -5,9 +5,9 @@ "requires": true, "dependencies": { "aws-sdk": { - "version": "2.824.0", - "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.824.0.tgz", - "integrity": "sha512-9KNRQBkIMPn+6DWb4gR+RzqTMNyGLEwOgXbE4dDehOIAflfLnv3IFwLnzrhxJnleB4guYrILIsBroJFBzjiekg==", + "version": "2.834.0", + "resolved": "https://registry.npmjs.org/aws-sdk/-/aws-sdk-2.834.0.tgz", + "integrity": "sha512-9WRULrn4qAmgXI+tEW/IG5s/6ixJGZqjPOrmJsFZQev7/WRkxAZmJAjcwd4Ifm/jsJbXx2FSwO76gOPEvu2LqA==", "requires": { "buffer": "4.9.2", "events": "1.1.1", diff --git a/scripts/reindex-images/package.json b/scripts/reindex-images/package.json index 7d3c3b2ba0..82df9633ab 100644 --- a/scripts/reindex-images/package.json +++ b/scripts/reindex-images/package.json @@ -9,6 +9,6 @@ "author": "Jonathon Herbert", "license": "ISC", "dependencies": { - "aws-sdk": "^2.824.0" + "aws-sdk": "^2.834.0" } } diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala new file mode 100644 index 0000000000..232c7d342c --- /dev/null +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/BucketMetadata.scala @@ -0,0 +1,64 @@ +package com.gu.mediaservice.scripts + + +import org.joda.time.DateTime +import play.api.libs.json.Json +import software.amazon.awssdk.auth.credentials.{DefaultCredentialsProvider, ProfileCredentialsProvider} +import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.services.s3.S3Client +import software.amazon.awssdk.services.s3.model.{HeadObjectRequest, ListObjectsV2Request} + +import java.io.{BufferedWriter, File, FileWriter} +import java.time.Instant +import scala.collection.JavaConverters.{asScalaIteratorConverter, iterableAsScalaIterableConverter, mapAsScalaMapConverter} + +case class ObjectMetadata(key: String, lastModified: Instant, metadata: Map[String, String]) +object ObjectMetadata { + implicit val format = Json.format[ObjectMetadata] +} + +/** + * Dump selected metadata for all objects in a bucket. + * Given a bucket and an output file this will create a JSON line per object containing the key, lastModified time and + * user metadata. + */ +object BucketMetadata { + def apply(args: List[String]): Unit = { + args match { + case bucketName :: fileName :: Nil => bucketMetadata(bucketName, new File(fileName)) + case _ => throw new IllegalArgumentException("Usage: BucketMetadata ") + } + } + + def bucketMetadata(bucketName: String, outputFile: File) = { + val fw = new FileWriter(outputFile) + System.err.println(s"Output encoding: ${fw.getEncoding}") + val stream = new BufferedWriter(fw) + + try { + val s3: S3Client = S3Client.builder + .region(Region.EU_WEST_1) + .credentialsProvider(DefaultCredentialsProvider.builder.profileName("media-service").build) + .build + + val results = s3.listObjectsV2Paginator(ListObjectsV2Request.builder.bucket(bucketName).build) + results.iterator().asScala.flatMap { results => + results.contents().asScala + }.map { s3Object => + val headObjectResponse = s3.headObject(HeadObjectRequest.builder.bucket(bucketName).key(s3Object.key).build) + s3Object -> headObjectResponse + }.map { case (s3Object, metadata) => + ObjectMetadata(s3Object.key, metadata.lastModified, metadata.metadata.asScala.toMap) + }.map { md => + Json.stringify(Json.toJson(md)) + }.zipWithIndex + .foreach { case (json, idx) => + if (idx % 1000 == 0) System.err.println(s"${DateTime.now.toString}: ${idx}") + stream.write(json) + stream.newLine() + } + } finally { + stream.close() + } + } +} diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala new file mode 100644 index 0000000000..6d2113dcb1 --- /dev/null +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/DecodeComparator.scala @@ -0,0 +1,39 @@ +package com.gu.mediaservice.scripts + +import com.gu.mediaservice.lib.net.URI +import play.api.libs.json.{JsError, JsSuccess, Json} + +import java.io.File +import scala.io.Source + +/** + * Given a JSON lines file output from BucketMetadata this will verify that metadata is not changed when being + * passed through a URI decode function. This was to check that it is safe to deploy + * https://github.com/guardian/grid/pull/3165 + */ +object DecodeComparator { + def apply(args: List[String]): Unit = { + args match { + case fileName :: Nil => compare(new File(fileName)) + case as => throw new IllegalArgumentException("Usage: DecodeComparator ") + } + } + + def compare(file: File): Unit = { + val source = Source.fromFile(file) + try { + source.getLines().foreach { line => + Json.fromJson[ObjectMetadata](Json.parse(line)) match { + case JsError(errors) => System.err.println(s"Couldn't parse JSON $line") + case JsSuccess(metadata, _) => + metadata.metadata.toList.foreach{ case (key, value) => + val decodedValue = URI.decode(value) + if (value != decodedValue) System.out.println(s"Difference between $key '$value' and '$decodedValue'") + } + } + } + } finally { + source.close() + } + } +} diff --git a/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala b/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala index d324473d45..a749af86af 100644 --- a/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala +++ b/scripts/src/main/scala/com/gu/mediaservice/scripts/Main.scala @@ -11,6 +11,8 @@ object Main extends App { case "GetSettings" :: as => GetSettings(as) case "UpdateSettings" :: as => UpdateSettings(as) case "ConvertConfig" :: as => ConvertConfig(as) + case "BucketMetadata" :: as => BucketMetadata(as) + case "DecodeComparator" :: as => DecodeComparator(as) case a :: _ => sys.error(s"Unrecognised command: $a") case Nil => sys.error("Usage: ") }