diff --git a/CHANGELOG.md b/CHANGELOG.md index 9731f96..60a2641 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +0.1.4 (2018-12-07) +================== + +* [New Feature] Add supporting to use Amazon S3 file for query execution + 0.1.3 (2018-12-07) ================== diff --git a/README.md b/README.md index 125ce63..a5a1b2e 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ _export: repositories: - https://jitpack.io dependencies: - - pro.civitaspo:digdag-operator-athena:0.1.3 + - pro.civitaspo:digdag-operator-athena:0.1.4 athena: auth_method: profile @@ -84,7 +84,7 @@ Define the below options on properties (which is indicated by `-c`, `--config`). ### Options -- **athena.query>**: The SQL query statements or file to be executed. You can use digdag's template engine like `${...}` in the SQL query. (string, required) +- **athena.query>**: The SQL query statements or file location (in local or Amazon S3) to be executed. You can use digdag's template engine like `${...}` in the SQL query. (string, required) - **token_prefix**: Prefix for `ClientRequestToken` that a unique case-sensitive string used to ensure the request to create the query is idempotent (executes only once). On this plugin, the token is composed like `${token_prefix}-${session_uuid}-${hash value of query}-${random string}`. (string, default: `"digdag-athena"`) - **database**: The name of the database. (string, optional) - **output**: The location in Amazon S3 where your query results are stored, such as `"s3://path/to/query/"`. For more information, see [Queries and Query Result Files](https://docs.aws.amazon.com/athena/latest/ug/querying.html). (string, default: `"s3://aws-athena-query-results-${AWS_ACCOUNT_ID}-"`) @@ -131,7 +131,7 @@ Define the below options on properties (which is indicated by `-c`, `--config`). ### Options -- **select_query**: The select SQL statements or file to be executed for a new table by [`Create Table As Select`]((https://aws.amazon.com/jp/about-aws/whats-new/2018/10/athena_ctas_support/)). You can use digdag's template engine like `${...}` in the SQL query. (string, required) +- **select_query**: The select SQL statements or file location (in local or Amazon S3) to be executed for a new table by [`Create Table As Select`]((https://aws.amazon.com/jp/about-aws/whats-new/2018/10/athena_ctas_support/)). You can use digdag's template engine like `${...}` in the SQL query. (string, required) - **database**: The database name for query execution context. (string, optional) - **table**: The table name for the new table (string, default: `digdag_athena_ctas_${session_uuid.replaceAll("-", "_")}`) - **output**: Output location for data created by CTAS (string, default: `"s3://aws-athena-query-results-${AWS_ACCOUNT_ID}-/Unsaved/${YEAR}/${MONTH}/${DAY}/${athena_query_id}/"`) @@ -199,3 +199,8 @@ aws configure # Author @civitaspo + +# Contributor + +- @s-wool +- @daichi-hirose diff --git a/build.gradle b/build.gradle index 7941f7c..dba15d6 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { } group = 'pro.civitaspo' -version = '0.1.3' +version = '0.1.4' def digdagVersion = '0.9.27' def awsSdkVersion = "1.11.372" diff --git a/example/example.dig b/example/example.dig index 0e7fdb9..ab148d7 100644 --- a/example/example.dig +++ b/example/example.dig @@ -4,7 +4,7 @@ _export: - file://${repos} # - https://jitpack.io dependencies: - - pro.civitaspo:digdag-operator-athena:0.1.3 + - pro.civitaspo:digdag-operator-athena:0.1.4 athena: auth_method: profile value: 5 diff --git a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaCtasOperator.scala b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaCtasOperator.scala index 46cd940..9f23652 100644 --- a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaCtasOperator.scala +++ b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaCtasOperator.scala @@ -74,8 +74,15 @@ class AthenaCtasOperator(operatorName: String, context: OperatorContext, systemC protected lazy val selectQuery: String = { val t: Try[String] = Try { - val f: File = workspace.getFile(selectQueryOrFile) - workspace.templateFile(templateEngine, f.getPath, UTF_8, params) + if (selectQueryOrFile.startsWith("s3://")) { + val uri = AmazonS3URI(selectQueryOrFile) + val content = withS3(_.getObjectAsString(uri.getBucket, uri.getKey)) + templateEngine.template(content, params) + } + else { + val f: File = workspace.getFile(selectQueryOrFile) + workspace.templateFile(templateEngine, f.getPath, UTF_8, params) + } } t.getOrElse(selectQueryOrFile) } diff --git a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala index c4c7afa..9ad0135 100644 --- a/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala +++ b/src/main/scala/pro/civitaspo/digdag/plugin/athena/operator/AthenaQueryOperator.scala @@ -72,8 +72,15 @@ class AthenaQueryOperator(operatorName: String, context: OperatorContext, system protected lazy val query: String = { val t = Try { - val f = workspace.getFile(queryOrFile) - workspace.templateFile(templateEngine, f.getPath, UTF_8, params) + if (queryOrFile.startsWith("s3://")) { + val uri = AmazonS3URI(queryOrFile) + val content = withS3(_.getObjectAsString(uri.getBucket, uri.getKey)) + templateEngine.template(content, params) + } + else { + val f = workspace.getFile(queryOrFile) + workspace.templateFile(templateEngine, f.getPath, UTF_8, params) + } } t.getOrElse(queryOrFile) }