diff --git a/modules/data-loading/examples/config-avro b/modules/data-loading/examples/config-avro index 36f7a403..2050789a 100644 --- a/modules/data-loading/examples/config-avro +++ b/modules/data-loading/examples/config-avro @@ -1,8 +1,8 @@ connector.class=org.apache.kafka.connect.mirror.MirrorSourceConnector source.cluster.alias=hello target.cluster.alias=world -source.cluster.bootstrap.servers=source.kafka.server:9092 -target.cluster.bootstrap.servers=localhost:30002 +source.cluster.bootstrap.servers= +target.cluster.bootstrap.servers= source->target.enabled=true topics=avro-without-registry-topic replication.factor=1 @@ -18,41 +18,10 @@ emit.heartbeats.interval.seconds=5 world.scheduled.rebalance.max.delay.ms=35000 key.converter=org.apache.kafka.connect.converters.ByteArrayConverter header.converter=org.apache.kafka.connect.converters.ByteArrayConverter -value.converter=com.tigergraph.kafka.connect.converters.TigerGraphAvroConverterWithoutSchemaRegistry - -producer.security.protocol=SASL_SSL -producer.sasl.mechanism=GSSAPI -producer.sasl.kerberos.service.name=kafka -producer.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true keyTab=\"/path/to/kafka-producer.keytab\" principal=\"kafka-producer@TIGERGRAPH.COM\"; -producer.ssl.endpoint.identification.algorithm= -producer.ssl.keystore.location=/path/to/client.keystore.jks -producer.ssl.keystore.password=****** -producer.ssl.key.password=****** -producer.ssl.truststore.location=/path/to/client.truststore.jks -producer.ssl.truststore.password=****** - -consumer.security.protocol=SASL_SSL -consumer.sasl.mechanism=GSSAPI -consumer.sasl.kerberos.service.name=kafka -consumer.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true keyTab=\"/path/to/kafka-consumer.keytab\" principal=\"kafka-consumer@TIGERGRAPH.COM\"; -consumer.ssl.endpoint.identification.algorithm= -consumer.ssl.keystore.location=/path/to/client.keystore.jks -consumer.ssl.keystore.password=****** -consumer.ssl.key.password=****** -consumer.ssl.truststore.location=/path/to/client.truststore.jks -consumer.ssl.truststore.password=****** - -source.admin.security.protocol=SASL_SSL -source.admin.sasl.mechanism=GSSAPI -source.admin.sasl.kerberos.service.name=kafka -source.admin.sasl.jaas.config=com.sun.security.auth.module.Krb5LoginModule required useKeyTab=true storeKey=true keyTab=\"/path/to/kafka-admin.keytab\" principal=\"kafka-admin@TIGERGRAPH.COM\"; -source.admin.ssl.endpoint.identification.algorithm= -source.admin.ssl.keystore.location=/path/to/client.keystore.jks -source.admin.ssl.keystore.password=****** -source.admin.ssl.key.password=****** -source.admin.ssl.truststore.location=/path/to/client.truststore.jks -source.admin.ssl.truststore.password=****** +transforms=TigerGraphAvroTransform +transforms.TigerGraphAvroTransform.type=com.tigergraph.kafka.connect.transformations.TigergraphAvroWithoutSchemaRegistryTransformation +transforms.TigerGraphAvroTransform.errors.tolerance=none [connector_1] name=avro-test-without-registry -tasks.max=10 +tasks.max=10 \ No newline at end of file diff --git a/modules/data-loading/pages/data-loading-overview.adoc b/modules/data-loading/pages/data-loading-overview.adoc index 93b81acf..48898e12 100644 --- a/modules/data-loading/pages/data-loading-overview.adoc +++ b/modules/data-loading/pages/data-loading-overview.adoc @@ -38,7 +38,7 @@ TigerGraph uses the same workflow for both local file and Kafka Connect loading: . *Specify a graph*. Data is always loading to exactly one graph (though that graph could have global vertices and edges which are shared with other graphs). For example: + -[source,php] +[source,gsql] USE GRAPH ldbc_snb . If you are using Kafka Connect, *define a `DATA_SOURCE` object*. @@ -64,7 +64,7 @@ image::data-loading:loading_arch_3.9.3.png[Architectural diagram showing support == Loading Jobs A loading job tells the database how to construct vertices and edges from data sources. -[source,php] +[source,gsql] .CREATE LOADING JOB syntax ---- CREATE LOADING JOB FOR GRAPH { diff --git a/modules/data-loading/partials/kafka/kafka-data-source-details.adoc b/modules/data-loading/partials/kafka/kafka-data-source-details.adoc index f4a51d7c..8ac8ff8b 100644 --- a/modules/data-loading/partials/kafka/kafka-data-source-details.adoc +++ b/modules/data-loading/partials/kafka/kafka-data-source-details.adoc @@ -13,8 +13,8 @@ To configure the data source object, the minimum requirement is the address of t .Data source configuration for external Kafka ---- { -"type": "mirrormaker", -"source.cluster.bootstrap.servers": "" + "type": "mirrormaker", + "source.cluster.bootstrap.servers": "" } ---- @@ -25,9 +25,17 @@ If the source cluster is configured for SSL or SASL protocols, you need to provi * If the source cluster uses SASL *and* SSL, you need to upload the keytab of each Kerberos principal, as well as the key store and truststore to every node of your TigerGraph cluster. Each file must be at the same absolute path on all nodes. -The following configurations are required for admin, producer and consumer. To supply the configuration for the corresponding component, replace `` with `source.admin`, `producer`, or `consumer`. +The following configurations are required for admin, producer and consumer. Basically Kafka allows SSL settings overriding, it respects security settings in precedence order: generic.ssl.setting < source/target.cluster.ssl.setting < admin/producer/consumer.ssl.setting. + +If both source and target clusters are sharing the same SSL settings, user can set generic settings for both source/target clusters and all the rols(admin/producer/consumer). For example, user can set "ssl.keystore.location=/path/to/key/store" instead of "source.cluster.ssl.keystore.location=/path/to/key/store", or "admin.ssl.keystore.location=/path/to/key/store", or even "source.cluster.admin.ssl.keystore.location=/path/to/key/store". + +If source and target clusters have different SSL settings, to make things simple, users can simply set cluster wide SSL configs, e.g., "target.cluster.ssl.truststore.password=/password/for/trust/store", instead of "target.cluster.producer.ssl.trust.password=/password/for/trust/store". + +To supply the configuration for the corresponding component, replace `` with `source(/or target).cluster`, `source(or target).cluster.admin(or producer, consumer)`, `admin`, `producer`, or `consumer`. For example, to specify `GSSAPI` as the SASL mechanism for consumer, include `"consumer.sasl.mecahnism": "GSSAPI"` in the data source configuration. +Note: SSL is now well supported by TigerGraph, we recommend users to set up regular SSL rather than SASL + PlainText/SSL. + [%header,cols="1,2"] |=== | Field | Description diff --git a/modules/data-loading/partials/kafka/kafka-example-loading-job.adoc b/modules/data-loading/partials/kafka/kafka-example-loading-job.adoc index 5cbf6a8c..48696f47 100644 --- a/modules/data-loading/partials/kafka/kafka-example-loading-job.adoc +++ b/modules/data-loading/partials/kafka/kafka-example-loading-job.adoc @@ -2,8 +2,8 @@ The following is an example loading job from and external Kafka cluster. -[source,php,linenums] -.Example loading job for BigQuery +[source,gsql,linenums] +.Example loading job from external Kafka ---- USE GRAPH ldbc_snb CREATE DATA_SOURCE s1 = "ds_config.json" FOR GRAPH ldbc_snb diff --git a/modules/data-loading/partials/load-part1-intro-and-schema.adoc b/modules/data-loading/partials/load-part1-intro-and-schema.adoc index f5ea6db4..070ae426 100644 --- a/modules/data-loading/partials/load-part1-intro-and-schema.adoc +++ b/modules/data-loading/partials/load-part1-intro-and-schema.adoc @@ -6,7 +6,7 @@ We will call out whether a particular step is common for all loading or specific == Example Schema This example uses part of the LDBC_SNB schema: -[source,php] +[source,gsql] .Example schema taken from LDBC_SNB ---- //Vertex Types: diff --git a/modules/data-loading/partials/load-part2-create-data-source.adoc b/modules/data-loading/partials/load-part2-create-data-source.adoc index 5a4ec42a..c2c421f3 100644 --- a/modules/data-loading/partials/load-part2-create-data-source.adoc +++ b/modules/data-loading/partials/load-part2-create-data-source.adoc @@ -8,13 +8,13 @@ Inline mode is required when creating data sources for TigerGraph Cloud instance In the following example, we create a data source named `s1`, and read its configuration information from a file called `ds_config.json`. -[source,php] +[source,gsql] USE GRAPH ldbc_snb CREATE DATA_SOURCE s1 = "ds_config.json" FOR GRAPH ldbc_snb Older versions of TigerGraph required a keyword after `DATA_SOURCE` such as `STREAM` or `KAFKA`. -[source,php] +[source,gsql] .Inline JSON data format when creating a data source CREATE DATA_SOURCE s1 = "{ type: , @@ -24,7 +24,7 @@ key: String literals can be enclosed with a double quote `"`, triple double quotes `"""`, or triple single quotes `'''`. Double quotes `"` in the JSON can be omitted if the key name does not contain a colon `:` or comma `,`. -[source,php] +[source,gsql] .Alternate quote syntax for inline JSON data CREATE DATA_SOURCE s1 = """{ "type": "", diff --git a/modules/data-loading/partials/load-part3-create-loading-job.adoc b/modules/data-loading/partials/load-part3-create-loading-job.adoc index dabf705e..33620ef6 100644 --- a/modules/data-loading/partials/load-part3-create-loading-job.adoc +++ b/modules/data-loading/partials/load-part3-create-loading-job.adoc @@ -8,7 +8,7 @@ These can refer to actual files or be placeholder names. The actual data sources . LOAD statements specify how to take the data fields from files to construct vertices or edges. //// -[source,php] +[source,gsql] .CREATE LOADING JOB syntax ---- CREATE LOADING JOB FOR GRAPH { diff --git a/modules/data-loading/partials/load-part3A-define-filenames.adoc b/modules/data-loading/partials/load-part3A-define-filenames.adoc index 8eebf474..b1ab24e5 100644 --- a/modules/data-loading/partials/load-part3A-define-filenames.adoc +++ b/modules/data-loading/partials/load-part3A-define-filenames.adoc @@ -4,7 +4,7 @@ First we define _filenames_, which are local variables referring to data files ( [NOTE] The terms `FILENAME` and `filevar` are used for legacy reasons, but a `filevar` can also be an object in a data object store. -[source,php] +[source,gsql] .DEFINE FILENAME syntax ---- DEFINE FILENAME filevar ["=" file_descriptor ]; @@ -13,7 +13,7 @@ DEFINE FILENAME filevar ["=" file_descriptor ]; The file descriptor can be specified at compile time or at runtime. Runtime settings override compile-time settings: -[source,php] +[source,gsql] .Specifying file descriptor at runtime ---- RUN LOADING JOB job_name USING filevar=file_descriptor_override diff --git a/modules/data-loading/partials/load-part3B-specify-mapping.adoc b/modules/data-loading/partials/load-part3B-specify-mapping.adoc index 1333fe63..f6d39d92 100644 --- a/modules/data-loading/partials/load-part3B-specify-mapping.adoc +++ b/modules/data-loading/partials/load-part3B-specify-mapping.adoc @@ -1,7 +1,7 @@ === Specify the data mapping Next, we use LOAD statements to describe how the incoming data will be loaded to attributes of vertices and edges. Each LOAD statement handles the data mapping, and optional data transformation and filtering, from one filename to one or more vertex and edge types. -[source,php] +[source,gsql] .LOAD statement syntax ---- LOAD [ source_object|filevar|TEMP_TABLE table_name ] @@ -12,7 +12,7 @@ LOAD [ source_object|filevar|TEMP_TABLE table_name ] <1> As of v3.9.3, TAGS are deprecated. Let's break down one of the LOAD statements in our example: -[source,php] +[source,gsql] .Example loading job for local files ---- LOAD file_Person TO VERTEX Person diff --git a/modules/data-loading/partials/load-part5-monitor-and-manage.adoc b/modules/data-loading/partials/load-part5-monitor-and-manage.adoc index 801aabaf..f12ad969 100644 --- a/modules/data-loading/partials/load-part5-monitor-and-manage.adoc +++ b/modules/data-loading/partials/load-part5-monitor-and-manage.adoc @@ -3,7 +3,7 @@ When a loading job starts, the GSQL server assigns it a job ID and displays it for the user to see. There are three key commands to monitor and manage loading jobs: -[source,php] +[source,gsql] ---- SHOW LOADING STATUS job_id|ALL ABORT LOADING JOB job_id|ALL @@ -12,7 +12,7 @@ RESUME LOADING JOB job_id `SHOW LOADING STATUS` shows the current status of either a specified loading job or all current jobs, this command should be within the scope of a graph: -[source,php] +[source,gsql] GSQL > USE GRAPH graph_name GSQL > SHOW LOADING STATUS ALL