diff --git a/tools/etl/tg-spark-connector/LICENSE b/tools/etl/tg-spark-connector/LICENSE
new file mode 100644
index 00000000..989e2c59
--- /dev/null
+++ b/tools/etl/tg-spark-connector/LICENSE
@@ -0,0 +1,201 @@
+Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
\ No newline at end of file
diff --git a/tools/etl/tg-spark-connector/pom.xml b/tools/etl/tg-spark-connector/pom.xml
new file mode 100644
index 00000000..e95980d4
--- /dev/null
+++ b/tools/etl/tg-spark-connector/pom.xml
@@ -0,0 +1,257 @@
+
+
+
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of the License at + * + *
http://www.apache.org/licenses/LICENSE-2.0 + * + *
Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark;
+
+import java.io.Serializable;
+import java.time.Instant;
+import java.util.Base64;
+import com.tigergraph.spark.client.Builder;
+import com.tigergraph.spark.client.Auth;
+import com.tigergraph.spark.client.Misc;
+import com.tigergraph.spark.client.Write;
+import com.tigergraph.spark.client.Auth.AuthResponse;
+import com.tigergraph.spark.client.common.RestppResponse;
+import com.tigergraph.spark.util.Options;
+import com.tigergraph.spark.util.Utils;
+import feign.FeignException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Initalize TG connection including:
+ * 1. init authentication;
+ * 2. init the clients needed for corresponding operations.
+ *
+ *
Note, it is not a real DB connection, no network connection will be cached. + * + *
This connection will be inited in driver, then be serialized and sent to executors. Transient
+ * variables will be rebuilt in executors.
+ */
+public class TigerGraphConnection implements Serializable {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphConnection.class);
+
+ private Options opts;
+ // Common connection variables
+ private final String graph;
+ private final String url;
+ private final long creationTime;
+ private String version;
+ private transient Misc misc;
+ // Authentication variables
+ private String basicAuth;
+ private String secret;
+ private String token;
+ private boolean restAuthEnabled;
+ private boolean restAuthInited;
+ private transient Auth auth;
+ // Loading job variables/consts
+ // spark job type is supported for [3.10.0,), [3.9.4,)
+ static final String JOB_IDENTIFIER = "spark";
+ static final String JOB_MACHINE = "all";
+ private String loadingJobId = null;
+ private transient Write write;
+
+ /**
+ * Only be called in driver, serialized and sent to executors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark;
+
+import org.apache.spark.sql.connector.catalog.SupportsWrite;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.write.LogicalWriteInfo;
+import org.apache.spark.sql.types.StructType;
+import java.time.Instant;
+import java.util.HashSet;
+import java.util.Set;
+import com.tigergraph.spark.write.TigerGraphWriteBuilder;
+
+/** The representation of logical structured data set of a TG, with supported capabilities. */
+public class TigerGraphTable implements SupportsWrite {
+
+ private static final String TABLE_NAME = "TigerGraphTable";
+ private final StructType schema;
+ private final long creationTime = Instant.now().toEpochMilli();
+
+ TigerGraphTable(StructType schema) {
+ this.schema = schema;
+ }
+
+ @Override
+ public String name() {
+ return TABLE_NAME;
+ }
+
+ @Override
+ public StructType schema() {
+ return schema;
+ }
+
+ @Override
+ public Set Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark;
+
+import java.util.Map;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.sources.DataSourceRegister;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * A pure implementation of Spark Data Source V2 that apply data operations to existing TG objects,
+ * e.g., loading job, pre-installed query, vertex or edge. DDL is unsupported.
+ */
+public class TigerGraphTableProvider implements TableProvider, DataSourceRegister {
+
+ private static final String SHORT_NAME = "tigergraph";
+
+ /**
+ * For Write operation, the schema will be the schema of input dataframe; For Read operation, it
+ * will be the user given schema.
+ */
+ @Override
+ public boolean supportsExternalMetadata() {
+ return true;
+ }
+
+ @Override
+ public StructType inferSchema(CaseInsensitiveStringMap options) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'inferSchema'");
+ }
+
+ @Override
+ public TigerGraphTable getTable(
+ StructType schema, Transform[] partitioning, Map Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client;
+
+import feign.*;
+import com.tigergraph.spark.client.common.RestppResponse;
+
+/** APIs for RESTPP authentication */
+public interface Auth {
+ public static final long TOKEN_LIFETIME_SEC = 6 * 60 * 60; // 6h
+
+ /**
+ * A helper function to check whether RESTPP auth is enabled, if not, an exception of 404 error
+ * will be thrown.
+ */
+ @RequestLine("GET /restpp/requesttoken")
+ AuthResponse checkAuthEnabled();
+
+ @RequestLine("POST /restpp/requesttoken")
+ @Headers({"Content-Type: application/json", "Authorization: Basic {basicAuth}"})
+ @Body("%7B\"graph\": \"{graph}\", \"lifetime\": \"{lifetime}\"%7D")
+ AuthResponse requestTokenWithUserPass(
+ @Param("graph") String graph,
+ @Param("basicAuth") String basicAuth,
+ @Param("lifetime") long lifetime);
+
+ @RequestLine("POST /restpp/requesttoken")
+ @Headers({"Content-Type: application/json"})
+ @Body("%7B\"secret\": \"{secret}\", \"lifetime\": \"{lifetime}\"%7D")
+ AuthResponse requestTokenWithSecret(
+ @Param("secret") String secret, @Param("lifetime") long lifetime);
+
+ @RequestLine("PUT /restpp/requesttoken")
+ @Headers({"Content-Type: application/json", "Authorization: Basic {basicAuth}"})
+ @Body("%7B\"token\": \"{token}\", \"lifetime\": \"{lifetime}\"%7D")
+ AuthResponse refreshTokenWithUserPass(
+ @Param("token") String token,
+ @Param("basicAuth") String basicAuth,
+ @Param("lifetime") long lifetime);
+
+ @RequestLine("PUT /restpp/requesttoken")
+ @Headers({"Content-Type: application/json"})
+ @Body("%7B\"secret\": \"{secret}\", \"token\": \"{token}\", \"lifetime\": \"{lifetime}\"%7D")
+ AuthResponse refreshTokenWithSecrect(
+ @Param("token") String token,
+ @Param("secret") String secret,
+ @Param("lifetime") long lifetime);
+
+ public class AuthResponse extends RestppResponse {
+ public long expiration;
+ public String token;
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Builder.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Builder.java
new file mode 100644
index 00000000..1b2ec1de
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Builder.java
@@ -0,0 +1,208 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client;
+
+import java.util.List;
+import java.util.Random;
+import java.io.InputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.security.KeyStore;
+import java.util.Arrays;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import javax.net.ssl.HostnameVerifier;
+import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
+import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
+import org.apache.hc.client5.http.ssl.DefaultHostnameVerifier;
+import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
+import org.apache.hc.client5.http.ssl.SSLConnectionSocketFactory;
+import org.apache.hc.client5.http.ssl.TrustAllStrategy;
+import org.apache.hc.core5.ssl.SSLContextBuilder;
+import org.apache.hc.core5.ssl.SSLContexts;
+import org.apache.spark.SparkFiles;
+import com.tigergraph.spark.client.common.RestppAuthInterceptor;
+import com.tigergraph.spark.client.common.RestppDecoder;
+import com.tigergraph.spark.client.common.RestppEncoder;
+import com.tigergraph.spark.client.common.RestppErrorDecoder;
+import com.tigergraph.spark.client.common.RestppRetryer;
+import com.tigergraph.spark.util.Options;
+import com.tigergraph.spark.util.Utils;
+import feign.*;
+import feign.Target.HardCodedTarget;
+import feign.codec.Decoder;
+import feign.codec.Encoder;
+import feign.codec.ErrorDecoder;
+import feign.hc5.ApacheHttp5Client;
+
+/** Builder for all client, with custom client settings. */
+public class Builder {
+
+ private Feign.Builder builder = new Feign.Builder();
+ // default client settings
+ private HttpClientBuilder hc5builder = HttpClientBuilder.create();
+ private PoolingHttpClientConnectionManagerBuilder connMgrBuilder =
+ PoolingHttpClientConnectionManagerBuilder.create();
+ private Encoder encoder = RestppEncoder.INSTANCE;
+ private Decoder decoder = RestppDecoder.INSTANCE;
+ private ErrorDecoder errDecoder = new RestppErrorDecoder(RestppDecoder.INSTANCE);
+ private Retryer retryer = new Retryer.Default();
+ private RequestInterceptor reqInterceptor;
+ private Request.Options reqOpts = new Request.Options();
+
+ public Builder setRequestOptions(int connectTimeoutMs, int readTimeoutMs) {
+ this.reqOpts =
+ new Request.Options(
+ connectTimeoutMs, TimeUnit.MILLISECONDS, readTimeoutMs, TimeUnit.MILLISECONDS, false);
+ return this;
+ }
+
+ /** Set response error decoder with the HTTP error codes that will be retried. */
+ public Builder setRetryableCode(Integer... code) {
+ this.errDecoder = new RestppErrorDecoder(decoder, code);
+ return this;
+ }
+
+ /** Set retryer for token expiration, io exception and server errors */
+ public Builder setRetryer(
+ Auth auth,
+ String basicAuth,
+ String secret,
+ String token,
+ int ioPeriod,
+ int ioMaxPeriod,
+ int ioMaxAttempts,
+ int serverPeriod,
+ int serverMaxPeriod,
+ int serverMaxAttempts) {
+ this.retryer =
+ new RestppRetryer(
+ auth,
+ basicAuth,
+ secret,
+ token,
+ ioPeriod,
+ ioMaxPeriod,
+ ioMaxAttempts,
+ serverPeriod,
+ serverMaxPeriod,
+ serverMaxAttempts);
+ return this;
+ }
+
+ /** Set retryer for io exception and server errors */
+ public Builder setRetryerWithoutAuth(
+ int ioPeriod,
+ int ioMaxPeriod,
+ int ioMaxAttempts,
+ int serverPeriod,
+ int serverMaxPeriod,
+ int serverMaxAttempts) {
+ this.retryer =
+ new RestppRetryer(
+ ioPeriod, ioMaxPeriod, ioMaxAttempts, serverPeriod, serverMaxPeriod, serverMaxAttempts);
+ return this;
+ }
+
+ /** Set request interceptor for adding authorization header */
+ public Builder setRequestInterceptor(String basicAuth, String token, boolean restAuthEnabled) {
+ this.reqInterceptor = new RestppAuthInterceptor(basicAuth, token, restAuthEnabled);
+ return this;
+ }
+
+ /** Set SSL context for the client */
+ public Builder setSSL(
+ String mode, String trustStoreFile, String trustStoreType, String password) {
+ HostnameVerifier hostnameVerifier = NoopHostnameVerifier.INSTANCE;
+ SSLContextBuilder sslContextBuilder = SSLContexts.custom();
+ try {
+ switch (mode) {
+ case Options.SSL_MODE_BASIC:
+ sslContextBuilder.loadTrustMaterial(null, new TrustAllStrategy());
+ break;
+ case Options.SSL_MODE_VERIFY_HOSTNAME:
+ hostnameVerifier = new DefaultHostnameVerifier();
+ // the security level of hostname verification is higher than
+ // CA verification, so need to continue to the next case
+ case Options.SSL_MODE_VERIFY_CA:
+ if (Utils.isEmpty(trustStoreFile)) {
+ throw new IllegalArgumentException("\"ssl.truststore\" is required for mode " + mode);
+ }
+ String path = SparkFiles.get(trustStoreFile);
+ final InputStream in = new FileInputStream(new File(path));
+ final KeyStore truststore = KeyStore.getInstance(trustStoreType);
+ if (Utils.isEmpty(password)) {
+ truststore.load(in, new char[0]);
+ } else {
+ truststore.load(in, password.toCharArray());
+ }
+ sslContextBuilder.loadTrustMaterial(truststore, null);
+ break;
+ default:
+ throw new IllegalArgumentException("Invalid SSL mode: " + mode);
+ }
+ connMgrBuilder.setSSLSocketFactory(
+ new SSLConnectionSocketFactory(sslContextBuilder.build(), hostnameVerifier));
+ } catch (Exception e) {
+ throw new RuntimeException("Failed to configure SSL", e);
+ }
+
+ return this;
+ }
+
+ public Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client;
+
+import com.tigergraph.spark.client.common.RestppResponse;
+import feign.*;
+
+/**
+ * Restpp API delaration used for connectivity check, token request and cluster basic info
+ * detection.
+ */
+public interface Misc {
+ @RequestLine("GET /restpp/version")
+ RestppResponse version();
+
+ @RequestLine("GET /gsqlserver/gsql/loading-jobs?action={action}&graph={graph}&jobId={jobId}")
+ RestppResponse loadingAction(
+ @Param("action") String action, @Param("graph") String graph, @Param("jobId") String jobId);
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Write.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Write.java
new file mode 100644
index 00000000..f6bf9caf
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/Write.java
@@ -0,0 +1,42 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client;
+
+import feign.*;
+import java.util.Map;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.tigergraph.spark.client.common.RestppResponse;
+
+/** Write service delaration used for data loading. */
+public interface Write {
+ @RequestLine("POST /restpp/ddl/{graph}")
+ @Headers({"Content-Type: text/plain"})
+ @Body("{data}")
+ LoadingResponse ddl(
+ @Param("graph") String graph,
+ @Param("data") String data,
+ @QueryMap Map Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import com.tigergraph.spark.util.Utils;
+import feign.RequestInterceptor;
+import feign.RequestTemplate;
+
+/**
+ * The request interceptor which is responsible for: Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import com.fasterxml.jackson.core.json.JsonReadFeature;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import feign.codec.Decoder;
+import feign.jackson.JacksonDecoder;
+
+public class RestppDecoder {
+ public static final Decoder INSTANCE =
+ new JacksonDecoder(
+ new ObjectMapper()
+ // restpp can respond newline-delimited json
+ .configure(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS.mappedFeature(), true)
+ .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false));
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppEncoder.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppEncoder.java
new file mode 100644
index 00000000..3e20d023
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppEncoder.java
@@ -0,0 +1,21 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import feign.codec.Encoder;
+import feign.jackson.JacksonEncoder;
+
+public class RestppEncoder {
+ public static final Encoder INSTANCE = new JacksonEncoder();
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppErrorDecoder.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppErrorDecoder.java
new file mode 100644
index 00000000..1588335d
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppErrorDecoder.java
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import java.util.Arrays;
+import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.constant.ErrorCode;
+import feign.Response;
+import feign.RetryableException;
+import feign.codec.Decoder;
+import feign.codec.ErrorDecoder;
+import org.apache.hc.core5.http.HttpStatus;
+
+/**
+ * Responsible for checking the HTTP status code to determine whether the request is retryable,
+ * throw a {@link RetryableException} or not.
+ */
+public class RestppErrorDecoder implements ErrorDecoder {
+ private static final Logger logger = LoggerFactory.getLogger(RestppErrorDecoder.class);
+
+ static final List Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import com.fasterxml.jackson.databind.JsonNode;
+
+/** Standard TG RESTPP response POJO */
+public class RestppResponse {
+ public String code;
+ public boolean error;
+ public String message;
+ public JsonNode results;
+
+ /** Throw exception when HTTP status code is 200 but RESTPP error=true */
+ public void panicOnFail() {
+ if (error) {
+ throw new RestppErrorException(code, message);
+ }
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppRetryer.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppRetryer.java
new file mode 100644
index 00000000..af8b88da
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppRetryer.java
@@ -0,0 +1,203 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.client.common;
+
+import java.io.IOException;
+import java.util.Random;
+import com.tigergraph.spark.client.Auth;
+import com.tigergraph.spark.util.Utils;
+import feign.RetryableException;
+import feign.Retryer;
+import org.apache.hc.core5.http.HttpStatus;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A mixed retryer for 3 types of errors: Each of them have their own retry interval or max attempts setting.
+ */
+public class RestppRetryer implements Retryer {
+ private static final Logger logger = LoggerFactory.getLogger(RestppRetryer.class);
+
+ private static final Random rand = new Random();
+ private static int TYPE_AUTH = 0; // token expiration
+ private static int TYPE_IO = 1; // transport exception, e.g. read timeout, connect timeout
+ private static int TYPE_SERVER = 2; // server timeout/busy, e.g. 502, 503, 504
+
+ // refresh token
+ private static final int REFRESH_MAX_ATTEMPTS = 1;
+ private static final int REFRESH_PERIOD_MS = 3000; // 3s
+ private final Auth auth;
+ private final String basicAuth;
+ private final String secret;
+ private final String token;
+ // arrays to record the retry status for different retry types
+ private final int[] period = new int[3];
+ private final int[] maxPeriod = new int[3];
+ private final int[] maxAttempts = new int[3];
+ private final int[] attempts = new int[3];
+ private final int[] sleptForMillis = new int[3];
+
+ public RestppRetryer(
+ Auth auth,
+ String basicAuth,
+ String secret,
+ String token,
+ int ioPeriod,
+ int ioMaxPeriod,
+ int ioMaxAttempts,
+ int serverPeriod,
+ int serverMaxPeriod,
+ int serverMaxAttempts) {
+ this.auth = auth;
+ this.basicAuth = basicAuth;
+ this.secret = secret;
+ this.token = token;
+
+ period[TYPE_AUTH] = REFRESH_PERIOD_MS;
+ maxPeriod[TYPE_AUTH] = REFRESH_PERIOD_MS;
+ maxAttempts[TYPE_AUTH] = REFRESH_MAX_ATTEMPTS;
+
+ period[TYPE_IO] = ioPeriod;
+ maxPeriod[TYPE_IO] = ioMaxPeriod;
+ maxAttempts[TYPE_IO] = ioMaxAttempts;
+
+ period[TYPE_SERVER] = serverPeriod;
+ maxPeriod[TYPE_SERVER] = serverMaxPeriod;
+ maxAttempts[TYPE_SERVER] = serverMaxAttempts;
+ }
+
+ /**
+ * Shortpath for creating retryer that doesn't support refresh token. E.g., we don't need that
+ * when creating {@link Auth} client
+ */
+ public RestppRetryer(
+ int ioPeriod,
+ int ioMaxPeriod,
+ int ioMaxAttempts,
+ int serverPeriod,
+ int serverMaxPeriod,
+ int serverMaxAttempts) {
+ this(
+ null,
+ null,
+ null,
+ null,
+ ioPeriod,
+ ioMaxPeriod,
+ ioMaxAttempts,
+ serverPeriod,
+ serverMaxPeriod,
+ serverMaxAttempts);
+ }
+
+ public void continueOrPropagate(RetryableException e) {
+ // infer the retry type
+ int retryType;
+ String reason;
+ if (e.getCause() instanceof IOException) {
+ retryType = TYPE_IO;
+ reason = e.getMessage();
+ } else if (e.status() == HttpStatus.SC_FORBIDDEN) {
+ retryType = TYPE_AUTH;
+ reason =
+ String.format(
+ "Token %s expired, attempt to retry after refresh", Utils.maskString(token, 2));
+ } else {
+ retryType = TYPE_SERVER;
+ reason = e.getMessage();
+ }
+ if (attempts[retryType]++ >= maxAttempts[retryType]) {
+ throw e;
+ }
+
+ long interval;
+ // Set the interval according to HTTP `Retry-After` header if any
+ if (e.retryAfter() != null) {
+ interval = e.retryAfter().getTime() - currentTimeMillis();
+ if (interval > maxPeriod[retryType]) {
+ interval = maxPeriod[retryType];
+ }
+ if (interval < 0) {
+ return;
+ }
+ } else {
+ interval = jitter(nextMaxInterval(retryType));
+ logger.info("{}, retry in {} ms, attempt {}", reason, interval, attempts[retryType]);
+ }
+ try {
+ Thread.sleep(interval);
+ } catch (InterruptedException ignored) {
+ Thread.currentThread().interrupt();
+ throw e;
+ }
+ sleptForMillis[retryType] += interval;
+
+ if (retryType == TYPE_AUTH && auth != null) {
+ if (!Utils.isEmpty(basicAuth)) {
+ auth.refreshTokenWithUserPass(token, basicAuth, Auth.TOKEN_LIFETIME_SEC).panicOnFail();
+ } else if (!Utils.isEmpty(secret)) {
+ auth.refreshTokenWithSecrect(token, secret, Auth.TOKEN_LIFETIME_SEC).panicOnFail();
+ } else {
+ // Don't support refresh token, throw it directly
+ throw e;
+ }
+ logger.info(
+ "Successfully refreshed token {} for {} seconds",
+ Utils.maskString(token, 2),
+ Auth.TOKEN_LIFETIME_SEC);
+ }
+ }
+
+ @Override
+ public RestppRetryer clone() {
+ return new RestppRetryer(
+ auth,
+ basicAuth,
+ secret,
+ token,
+ period[TYPE_IO],
+ maxPeriod[TYPE_IO],
+ maxAttempts[TYPE_IO],
+ period[TYPE_SERVER],
+ maxPeriod[TYPE_SERVER],
+ maxAttempts[TYPE_SERVER]);
+ }
+
+ protected long currentTimeMillis() {
+ return System.currentTimeMillis();
+ }
+
+ // visible for testing;
+ // 0.75 * interval ~ 1.25 * interval
+ protected static long jitter(long interval) {
+ return (long) (0.75 * interval + 0.5 * interval * rand.nextDouble());
+ }
+
+ /**
+ * Calculates the time interval to a retry attempt. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.constant;
+
+/** RESTPP Error Codes */
+public class ErrorCode {
+ public static final String TOKEN_EXPIRATION = "REST-10019";
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/util/OptionDef.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/util/OptionDef.java
new file mode 100644
index 00000000..2739a032
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/util/OptionDef.java
@@ -0,0 +1,144 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.util;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.io.Serializable;
+import java.util.UUID;
+
+public class OptionDef implements Serializable {
+ // A unique Java object which represents the lack of a default value.
+ public static final Serializable NO_DEFAULT_VALUE = UUID.randomUUID();
+
+ // Options' definitions
+ private final Map Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/*
+ * the result of option validation. there are multiple messages when this option has multiple error.
+ */
+public class OptionError {
+ private String key;
+ private Object originalValue;
+ private List Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.util;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import com.tigergraph.spark.util.OptionDef.OptionKey;
+import com.tigergraph.spark.util.OptionDef.Type;
+
+/** Validate and transform Spark DataFrame options(configurations) */
+public class Options implements Serializable {
+
+ public static enum OptionType {
+ WRITE,
+ READ
+ }
+
+ private final OptionType optionType;
+
+ public static final String GRAPH = "graph";
+ public static final String URL = "url";
+ public static final String VERSION = "version";
+ public static final String USERNAME = "username";
+ public static final String PASSWORD = "password";
+ public static final String SECRET = "secret";
+ public static final String TOKEN = "token";
+ // loading
+ public static final String LOADING_JOB = "loading.job";
+ public static final String LOADING_FILENAME = "loading.filename";
+ public static final String LOADING_SEPARATOR = "loading.separator";
+ public static final String LOADING_EOL = "loading.eol";
+ public static final String LOADING_BATCH_SIZE_BYTES = "loading.batch.size.bytes";
+ public static final String LOADING_TIMEOUT_MS = "loading.timeout.ms";
+ public static final String LOADING_MAX_PERCENT_ERROR = "loading.max.percent.error";
+ public static final String LOADING_MAX_NUM_ERROR = "loading.max.num.error";
+ public static final String LOADING_RETRY_INTERVAL_MS = "loading.retry.interval.ms";
+ public static final String LOADING_MAX_RETRY_INTERVAL_MS = "loading.max.retry.interval.ms";
+ public static final String LOADING_MAX_RETRY_ATTEMPTS = "loading.max.retry.attempts";
+ // loading - default
+ public static final String LOADING_SEPARATOR_DEFAULT = ",";
+ public static final String LOADING_EOL_DEFAULT = "\n";
+ public static final int LOADING_BATCH_SIZE_BYTES_DEFAULT = 2 * 1024 * 1024; // 2mb
+ public static final int LOADING_TIMEOUT_MS_DEFAULT = 0; // restpp default
+ public static final int LOADING_RETRY_INTERVAL_MS_DEFAULT = 5 * 1000; // 5s
+ public static final int LOADING_MAX_RETRY_INTERVAL_MS_DEFAULT = 5 * 60 * 1000; // 5min
+ public static final int LOADING_MAX_RETRY_ATTEMPTS_DEFAULT = 10;
+ // http transport
+ public static final String IO_CONNECT_TIMEOUT_MS = "io.connect.timeout.ms";
+ public static final String IO_READ_TIMEOUT_MS = "io.read.timeout.ms";
+ public static final String IO_RETRY_INTERVAL_MS = "io.retry.interval.ms";
+ public static final String IO_MAX_RETRY_INTERVAL_MS = "io.max.retry.interval.ms";
+ public static final String IO_MAX_RETRY_ATTEMPTS = "io.max.retry.attempts";
+ // http transport - default
+ public static final int IO_CONNECT_TIMEOUT_MS_DEFAULT = 30 * 1000; // 30s
+ public static final int IO_READ_TIMEOUT_MS_DEFAULT = 60 * 1000; // 1min
+ public static final int IO_RETRY_INTERVAL_MS_DEFAULT = 5 * 1000; // 5s
+ public static final int IO_MAX_RETRY_INTERVAL_MS_DEFAULT = 10 * 1000; // 10s
+ public static final int IO_MAX_RETRY_ATTEMPTS_DEFAULT = 5;
+ // SSL
+ public static final String SSL_MODE = "ssl.mode";
+ public static final String SSL_MODE_BASIC = "basic";
+ public static final String SSL_MODE_VERIFY_CA = "verifyCA";
+ public static final String SSL_MODE_VERIFY_HOSTNAME = "verifyHostname";
+ public static final String SSL_TRUSTSTORE = "ssl.truststore";
+ public static final String SSL_TRUSTSTORE_TYPE = "ssl.truststore.type";
+ public static final String SSL_TRUSTSTORE_PASSWORD = "ssl.truststore.password";
+ public static final String SSL_TRUSTSTORE_TYPE_DEFAULT = "JKS";
+
+ // Options' group name
+ public static final String GROUP_GENERAL = "general";
+ public static final String GROUP_AUTH = "auth";
+ public static final String GROUP_LOADING_JOB = "loading.job";
+ public static final String GROUP_TRANSPORT_TIMEOUT = "transport.timeout";
+ public static final String GROUP_SSL = "ssl";
+
+ private final Map Visible for testing
+ *
+ * @return the errors of validation, If the returned List's size is 0, there is no validation
+ * error.
+ */
+ protected List Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+/** Utilities */
+public class Utils {
+ public static final String DEFAULT_VERSION = "999.999.999";
+ public static final Pattern VERSION_PARTTERN = Pattern.compile("(\\d+\\.\\d+\\.\\d+)");
+
+ /***************** VERSION *****************/
+
+ /** Extract the TG version from the response msg of /version endpoint */
+ public static String extractVersion(String input) {
+ Matcher matcher = VERSION_PARTTERN.matcher(input);
+ if (matcher.find()) {
+ return matcher.group(1);
+ } else {
+ return DEFAULT_VERSION;
+ }
+ }
+
+ /**
+ * Compare the two input versions
+ *
+ * @return positive v1 > v2; 0 if v1 == v2; negative if v1 < v2
+ */
+ public static int versionCmp(String v1, String v2) {
+ return fmtVersion(v1).compareTo(fmtVersion(v2));
+ }
+
+ /** Format version string to fixed length: 3.10.1 => 003010001 */
+ private static String fmtVersion(String version) {
+ final List Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+import com.tigergraph.spark.client.common.RestppResponse;
+import com.tigergraph.spark.util.Utils;
+
+/**
+ * Define how to write the data to TG for batch processing.
+ *
+ * The writing procedure is:
+ *
+ *
+ */
+public class TigerGraphBatchWrite extends TigerGraphWriteBase implements BatchWrite {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphBatchWrite.class);
+
+ TigerGraphBatchWrite(StructType schema, TigerGraphConnection conn) {
+ super(schema, conn);
+ }
+
+ @Override
+ public TigerGraphBatchWriterFactory createBatchWriterFactory(PhysicalWriteInfo info) {
+ return new TigerGraphBatchWriterFactory(schema, conn);
+ }
+
+ @Override
+ public void commit(WriterCommitMessage[] messages) {
+ logger.info(
+ "Finished batch loading job {}",
+ conn.getLoadingJobId() == null ? "" : conn.getLoadingJobId());
+ logger.info("Total processed rows: {}", getTotalProcessedRows(messages));
+ logger.info("Processed rows of each task:\n{}", getTaskSummury(messages));
+ RestppResponse resp = getLoadingStatistics();
+ if (resp != null) {
+ Utils.removeUserData(resp.results);
+ logger.info("Overall loading statistics: {}", resp.results.toPrettyString());
+ }
+ }
+
+ @Override
+ public void abort(WriterCommitMessage[] messages) {
+ logger.error(
+ "Aborted batch loading job {}",
+ conn.getLoadingJobId() == null ? "" : conn.getLoadingJobId());
+ logger.info("Total processed rows: {}", getTotalProcessedRows(messages));
+ logger.info("Processed rows of each task:\n{}", getTaskSummury(messages));
+ RestppResponse resp = getLoadingStatistics();
+ if (resp != null) {
+ Utils.removeUserData(resp.results);
+ logger.info("Overall loading statistics: {}", resp.results.toPrettyString());
+ }
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphBatchWriterFactory.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphBatchWriterFactory.java
new file mode 100644
index 00000000..638b7579
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphBatchWriterFactory.java
@@ -0,0 +1,44 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.DataWriterFactory;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+
+/**
+ * A factory of {@link TigerGraphDataWriter} for batch write, which is responsible for creating and
+ * initializing the actual data writer at executor side.
+ */
+public class TigerGraphBatchWriterFactory implements DataWriterFactory {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphBatchWriterFactory.class);
+
+ private final StructType schema;
+ private final TigerGraphConnection conn;
+
+ TigerGraphBatchWriterFactory(StructType schema, TigerGraphConnection conn) {
+ this.schema = schema;
+ this.conn = conn;
+ logger.info("Created {} for executor", TigerGraphBatchWriterFactory.class);
+ }
+
+ @Override
+ public TigerGraphDataWriter createWriter(int partitionId, long taskId) {
+ logger.info(
+ "Creating TigerGraph batch writer for partitionId {}, taskId {}.", partitionId, taskId);
+ return new TigerGraphDataWriter(schema, conn, partitionId, taskId);
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphDataWriter.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphDataWriter.java
new file mode 100644
index 00000000..b6906f2e
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphDataWriter.java
@@ -0,0 +1,154 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+import com.tigergraph.spark.client.Write;
+import com.tigergraph.spark.client.Write.LoadingResponse;
+import com.tigergraph.spark.util.Options;
+import com.tigergraph.spark.util.Utils;
+
+/** The data writer of an executor responsible for writing data for an input RDD partition. */
+public class TigerGraphDataWriter implements DataWriter Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+
+/**
+ * A factory of {@link TigerGraphDataWriter} for streaming write, which is responsible for creating
+ * and initializing the actual data writer at executor side.
+ */
+public class TigerGraphStreamWriterFactory implements StreamingDataWriterFactory {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphStreamWriterFactory.class);
+
+ private final StructType schema;
+ private final TigerGraphConnection conn;
+
+ TigerGraphStreamWriterFactory(StructType schema, TigerGraphConnection conn) {
+ this.schema = schema;
+ this.conn = conn;
+ logger.info("Created {} for executor", TigerGraphBatchWriterFactory.class);
+ }
+
+ @Override
+ public TigerGraphDataWriter createWriter(int partitionId, long taskId, long epochId) {
+ logger.info(
+ "Create TigerGraph streaming writer for partitionId {}, taskId {}, epochId {}.",
+ partitionId,
+ taskId,
+ epochId);
+ return new TigerGraphDataWriter(schema, conn, partitionId, taskId, epochId);
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphStreamingWrite.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphStreamingWrite.java
new file mode 100644
index 00000000..5723f32d
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphStreamingWrite.java
@@ -0,0 +1,85 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.connector.write.PhysicalWriteInfo;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+import com.tigergraph.spark.client.common.RestppResponse;
+import com.tigergraph.spark.util.Utils;
+
+/**
+ * Defines how to write the data to TG in streaming queries.
+ *
+ * The writing procedure is:
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+import com.tigergraph.spark.client.common.RestppResponse;
+import com.tigergraph.spark.util.Utils;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+/** Base class for {@link TigerGraphBatchWrite} and {@link TigerGraphStreamingWrite}. */
+public class TigerGraphWriteBase {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphWriteBase.class);
+
+ protected static String GSQL_GET_PROGRESS = "getprogress";
+
+ protected final StructType schema;
+ protected final TigerGraphConnection conn;
+
+ public TigerGraphWriteBase(StructType schema, TigerGraphConnection conn) {
+ this.schema = schema;
+ this.conn = conn;
+ }
+
+ protected RestppResponse getLoadingStatistics() {
+ if (Utils.versionCmp(conn.getVersion(), "3.9.4") >= 0) {
+ try {
+ RestppResponse resp =
+ conn.getMisc()
+ .loadingAction(GSQL_GET_PROGRESS, conn.getGraph(), conn.getLoadingJobId());
+ resp.panicOnFail();
+ return resp;
+ } catch (Exception e) {
+ logger.info(
+ "Failed to query loading statistics of job {}: {}, it won't block the loading"
+ + " and you can manually query it via `curl -X GET -u Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import java.time.Instant;
+import org.apache.spark.sql.connector.write.LogicalWriteInfo;
+import org.apache.spark.sql.connector.write.WriteBuilder;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import com.tigergraph.spark.TigerGraphConnection;
+import com.tigergraph.spark.util.Options;
+
+/** Builder for Batch Write or Streaming Write */
+public class TigerGraphWriteBuilder implements WriteBuilder {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphWriteBuilder.class);
+ private final StructType schema;
+ private final TigerGraphConnection conn;
+
+ public TigerGraphWriteBuilder(LogicalWriteInfo info, long creationTime) {
+ logger.info("Start to build TigerGraph data writer with queryId {}", info.queryId());
+ schema = info.schema();
+ Options opts = new Options(info.options().asCaseSensitiveMap(), Options.OptionType.WRITE);
+ opts.validate();
+ conn = new TigerGraphConnection(opts, creationTime);
+ if (conn.getLoadingJobId() != null) {
+ logger.info("Loading job ID: {}", conn.getLoadingJobId());
+ }
+ }
+
+ public TigerGraphWriteBuilder(LogicalWriteInfo info) {
+ this(info, Instant.now().toEpochMilli());
+ }
+
+ public TigerGraphBatchWrite buildForBatch() {
+ return new TigerGraphBatchWrite(schema, conn);
+ }
+
+ public TigerGraphStreamingWrite buildForStreaming() {
+ return new TigerGraphStreamingWrite(schema, conn);
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriterCommitMessage.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriterCommitMessage.java
new file mode 100644
index 00000000..b0161e41
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriterCommitMessage.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the
+ * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied. See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.tigergraph.spark.write;
+
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
+
+/**
+ * A commit message returned by TigerGraphDataWriter.commit() and will be sent back to the driver
+ * side as the input parameter of TigerGraphBatchWrite.commit(WriterCommitMessage []) or
+ * TigerGraphStreamingWrite.commit(long, WriterCommitMessage []).
+ */
+public class TigerGraphWriterCommitMessage implements WriterCommitMessage {
+ private final long loadedRows;
+ private final int partitionId;
+ private final long taskId;
+
+ TigerGraphWriterCommitMessage(long loadedRows, int partitionId, long taskId) {
+ this.loadedRows = loadedRows;
+ this.partitionId = partitionId;
+ this.taskId = taskId;
+ }
+
+ public String toString() {
+ return String.format(
+ "PartitionId: %,d, taskId: %,d, loaded rows: %,d", partitionId, taskId, loadedRows);
+ }
+
+ public long getLoadedRows() {
+ return this.loadedRows;
+ }
+
+ public int getPartitionId() {
+ return this.partitionId;
+ }
+
+ public long getTaskId() {
+ return this.taskId;
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/tools/etl/tg-spark-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
new file mode 100644
index 00000000..2fca6f96
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -0,0 +1 @@
+com.tigergraph.spark.TigerGraphTableProvider
\ No newline at end of file
diff --git a/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/TigerGraphConnectionTest.java b/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/TigerGraphConnectionTest.java
new file mode 100644
index 00000000..d4171230
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/TigerGraphConnectionTest.java
@@ -0,0 +1,13 @@
+package com.tigergraph.spark;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.Test;
+
+public class TigerGraphConnectionTest {
+ @Test
+ public void testGenerateJobId() {
+ assertTrue(
+ TigerGraphConnection.generateJobId("graph", "load_social", 1234567)
+ .equals("graph.load_social.spark.all.1234567"));
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/client/BuilderTest.java b/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/client/BuilderTest.java
new file mode 100644
index 00000000..e6e7f047
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/test/java/com/tigergraph/spark/client/BuilderTest.java
@@ -0,0 +1,32 @@
+package com.tigergraph.spark.client;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.jupiter.api.Test;
+import com.tigergraph.spark.client.Builder.LoadBalanceTarget;
+
+public class BuilderTest {
+
+ @Test
+ public void LoadBalanceTest() {
+ List
+ * 1. build http client, set SSLSocketFactory if SSL enbled
+ * 2. based on 1, build {@link Auth} client
+ * 3. based on 2, detect if auth is enabled and request token if not given
+ * 4. based on 3, we can build requestInterceptor(add auth header) and retryer(refresh token) for
+ * other clients
+ * 5. init for specific operations, e.g., loading job id
+ *
+ * @param opts
+ */
+ public TigerGraphConnection(Options opts, long creationTime) {
+ this.opts = opts;
+ this.creationTime = creationTime;
+ graph = opts.getString(Options.GRAPH);
+ url = opts.getString(Options.URL);
+ initAuth();
+ // get TG version
+ version = opts.getString(Options.VERSION);
+ if (Utils.isEmpty(version)) {
+ RestppResponse verResp = getMisc().version();
+ verResp.panicOnFail();
+ version = Utils.extractVersion(verResp.message);
+ }
+ if (Utils.versionCmp(version, "3.6.0") <= 0) {
+ throw new UnsupportedOperationException(
+ "TigerGraph version under 3.6.0 is unsupported, current version: " + version);
+ }
+ logger.info("TigerGraph version: {}", version);
+
+ if (Options.OptionType.WRITE.equals(opts.getOptionType())
+ && Utils.versionCmp(version, "3.9.4") >= 0) {
+ loadingJobId = generateJobId(graph, opts.getString(Options.LOADING_JOB), creationTime);
+ }
+ }
+
+ public TigerGraphConnection(Options opts) {
+ this(opts, Instant.now().toEpochMilli());
+ }
+
+ private void initAuth() {
+ if (!restAuthInited) {
+ this.secret = opts.getString(Options.SECRET);
+ this.token = opts.getString(Options.TOKEN);
+ // 1. encode username:password to basic auth
+ if (!Utils.isEmpty(opts.getString(Options.USERNAME))
+ && !Utils.isEmpty(opts.getString(Options.PASSWORD))) {
+ this.basicAuth =
+ new String(
+ Base64.getEncoder()
+ .encode(
+ (opts.getString(Options.USERNAME) + ":" + opts.getString(Options.PASSWORD))
+ .getBytes()));
+ }
+ // 2. init Auth client
+ getAuth();
+ // 3. check if restpp auth is enabled
+ restAuthEnabled = true;
+ try {
+ auth.checkAuthEnabled();
+ } catch (FeignException e) {
+ if (e.status() == 404) {
+ restAuthEnabled = false;
+ logger.warn(
+ "RESTPP authentication is not enabled, you can enable it via `gadmin config set"
+ + " RESTPP.Factory.EnableAuth true`");
+ } else {
+ throw e;
+ }
+ }
+ // 4. request token if username/password or secret is provided but token is empty
+ if (restAuthEnabled && Utils.isEmpty(token)) {
+ AuthResponse resp;
+ if (!Utils.isEmpty(basicAuth)) {
+ resp = auth.requestTokenWithUserPass(graph, basicAuth, Auth.TOKEN_LIFETIME_SEC);
+ resp.panicOnFail();
+ token = resp.results.get("token").asText();
+ } else if (!Utils.isEmpty(secret)) {
+ resp = auth.requestTokenWithSecret(secret, Auth.TOKEN_LIFETIME_SEC);
+ resp.panicOnFail();
+ token = resp.token;
+ } else {
+ throw new IllegalArgumentException(
+ "Restpp authentication is enabled, please provide at least one of the 'token',"
+ + " 'secret' or 'username/password' pair.");
+ }
+ logger.info(
+ "Requested new token {} for RESTPP authentication, expiration: {}",
+ Utils.maskString(token, 2),
+ resp.expiration);
+ }
+ restAuthInited = true;
+ }
+ }
+
+ /** Get auth client for requesting/refreshing token */
+ private Auth getAuth() {
+ if (auth == null) {
+ Builder builder =
+ new Builder()
+ .setRequestOptions(
+ opts.getInt(Options.IO_CONNECT_TIMEOUT_MS),
+ opts.getInt(Options.IO_READ_TIMEOUT_MS))
+ .setRetryerWithoutAuth(
+ opts.getInt(Options.IO_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_ATTEMPTS),
+ opts.getInt(Options.IO_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_ATTEMPTS));
+ if (url.trim().toLowerCase().startsWith("https://")) {
+ builder.setSSL(
+ opts.getString(Options.SSL_MODE),
+ opts.getString(Options.SSL_TRUSTSTORE),
+ opts.getString(Options.SSL_TRUSTSTORE_TYPE),
+ opts.getString(Options.SSL_TRUSTSTORE_PASSWORD));
+ }
+ auth = builder.build(Auth.class, url);
+ }
+ return auth;
+ }
+
+ public Misc getMisc() {
+ if (!restAuthInited) {
+ initAuth();
+ }
+
+ if (misc == null) {
+ Builder builder =
+ new Builder()
+ .setRequestOptions(
+ opts.getInt(Options.IO_CONNECT_TIMEOUT_MS),
+ opts.getInt(Options.IO_READ_TIMEOUT_MS))
+ .setRetryer(
+ getAuth(),
+ basicAuth,
+ secret,
+ token,
+ opts.getInt(Options.IO_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_ATTEMPTS),
+ opts.getInt(Options.IO_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_ATTEMPTS))
+ .setRequestInterceptor(basicAuth, token, restAuthEnabled);
+ if (url.trim().toLowerCase().startsWith("https://")) {
+ builder.setSSL(
+ opts.getString(Options.SSL_MODE),
+ opts.getString(Options.SSL_TRUSTSTORE),
+ opts.getString(Options.SSL_TRUSTSTORE_TYPE),
+ opts.getString(Options.SSL_TRUSTSTORE_PASSWORD));
+ }
+ misc = builder.build(Misc.class, url);
+ }
+ return misc;
+ }
+
+ /** Get write client (/restpp/ddl) */
+ public Write getWrite() {
+ if (!Options.OptionType.WRITE.equals(opts.getOptionType())) {
+ throw new UnsupportedOperationException(
+ "Can't build write client for OptionType " + opts.getOptionType());
+ }
+
+ if (!restAuthInited) {
+ initAuth();
+ }
+
+ if (write == null) {
+ Builder builder =
+ new Builder()
+ .setRequestOptions(
+ opts.getInt(Options.IO_CONNECT_TIMEOUT_MS),
+ opts.getInt(Options.IO_READ_TIMEOUT_MS))
+ .setRetryer(
+ getAuth(),
+ basicAuth,
+ secret,
+ token,
+ opts.getInt(Options.IO_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.IO_MAX_RETRY_ATTEMPTS),
+ opts.getInt(Options.LOADING_RETRY_INTERVAL_MS),
+ opts.getInt(Options.LOADING_MAX_RETRY_INTERVAL_MS),
+ opts.getInt(Options.LOADING_MAX_RETRY_ATTEMPTS))
+ .setRequestInterceptor(basicAuth, token, restAuthEnabled);
+ if (url.trim().toLowerCase().startsWith("https://")) {
+ builder.setSSL(
+ opts.getString(Options.SSL_MODE),
+ opts.getString(Options.SSL_TRUSTSTORE),
+ opts.getString(Options.SSL_TRUSTSTORE_TYPE),
+ opts.getString(Options.SSL_TRUSTSTORE_PASSWORD));
+ }
+ write = builder.build(Write.class, url);
+ }
+ return write;
+ }
+
+ /**
+ * Generate loading job id:
+ *
+ * 1. attach the basic auth header to the /restpp request 2. attach the bearer auth header to the
+ * /gsqlserver request
+ */
+public class RestppAuthInterceptor implements RequestInterceptor {
+
+ static final String GSQL_ENDPOINT = "/gsqlserver";
+
+ private final String basicAuth;
+ private final String token;
+ private final boolean restAuthEnabled;
+
+ public RestppAuthInterceptor(String basicAuth, String token, boolean restAuthEnabled) {
+ this.basicAuth = basicAuth;
+ this.token = token;
+ this.restAuthEnabled = restAuthEnabled;
+ }
+
+ @Override
+ public void apply(RequestTemplate template) {
+ // If rest auth enabled, a token should be provided or requested,
+ // any requests should have the auth header.
+ if (restAuthEnabled) {
+ template.header("Authorization", "Bearer " + token);
+ } else if (template.path().contains(GSQL_ENDPOINT)) {
+ // If restpp auth disabled, /gsqlserver endpoint still need authentication.
+ // user/pass pair and token(requested by user/pass, not system token) are equivalent
+ if (!Utils.isEmpty(token)) {
+ template.header("Authorization", "Bearer " + token);
+ } else if (!Utils.isEmpty(basicAuth)) {
+ template.header("Authorization", "Basic " + basicAuth);
+ } else {
+ throw new IllegalArgumentException(
+ "Failed to send request to "
+ + template.path()
+ + ", no username/password or token provided.");
+ }
+ }
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppDecoder.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppDecoder.java
new file mode 100644
index 00000000..4db9dc40
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/client/common/RestppDecoder.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ *
+ * 1. token expiration
+ * 2. transport exception, e.g. read timeout, connect timeout
+ * 3. server timeout/busy, e.g. 502, 503, 504
+ *
+ *
+ * The interval increases exponentially with each attempt, at a rate of nextInterval *= 1.5 (where
+ * 1.5 is the backoff factor), to the maximum interval.
+ *
+ * @return time in milliseconds from now until the next attempt.
+ */
+ private long nextMaxInterval(int retryType) {
+ long interval = (long) (period[retryType] * Math.pow(1.5, attempts[retryType] - 1));
+ return interval > maxPeriod[retryType] ? maxPeriod[retryType] : interval;
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/constant/ErrorCode.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/constant/ErrorCode.java
new file mode 100644
index 00000000..cebfcabb
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/constant/ErrorCode.java
@@ -0,0 +1,19 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ *
+ *
+ *
+ *
+ *
+ */
+public class TigerGraphStreamingWrite extends TigerGraphWriteBase implements StreamingWrite {
+ private static final Logger logger = LoggerFactory.getLogger(TigerGraphStreamingWrite.class);
+
+ TigerGraphStreamingWrite(StructType schema, TigerGraphConnection conn) {
+ super(schema, conn);
+ }
+
+ @Override
+ public TigerGraphStreamWriterFactory createStreamingWriterFactory(PhysicalWriteInfo info) {
+ return new TigerGraphStreamWriterFactory(schema, conn);
+ }
+
+ @Override
+ public void commit(long epochId, WriterCommitMessage[] messages) {
+ logger.info(
+ "Finished writing streaming updates({}) to TigerGraph {}",
+ epochId,
+ conn.getLoadingJobId() == null ? "" : ", Job ID: " + conn.getLoadingJobId());
+ logger.info("Total processed rows by this update: {}", getTotalProcessedRows(messages));
+ logger.info("Processed rows of each task by this update:\n{}", getTaskSummury(messages));
+ RestppResponse resp = getLoadingStatistics();
+ if (resp != null) {
+ Utils.removeUserData(resp.results);
+ logger.info("The up-to-date overall loading statistics: {}", resp.results.toPrettyString());
+ }
+ }
+
+ @Override
+ public void abort(long epochId, WriterCommitMessage[] messages) {
+ logger.error(
+ "Aborted when writing streaming updates({}) to TigerGraph {}",
+ epochId,
+ conn.getLoadingJobId() == null ? "" : ", Job ID: " + conn.getLoadingJobId());
+ logger.info("Total processed rows by this update: {}", getTotalProcessedRows(messages));
+ logger.info("Processed rows of each task by this update:\n{}", getTaskSummury(messages));
+ RestppResponse resp = getLoadingStatistics();
+ if (resp != null) {
+ Utils.removeUserData(resp.results);
+ logger.info("The overall loading statistics: {}", resp.results.toPrettyString());
+ }
+ }
+}
diff --git a/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriteBase.java b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriteBase.java
new file mode 100644
index 00000000..fe41ca75
--- /dev/null
+++ b/tools/etl/tg-spark-connector/src/main/java/com/tigergraph/spark/write/TigerGraphWriteBase.java
@@ -0,0 +1,75 @@
+/**
+ * Copyright (c) 2023 TigerGraph Inc.
+ *
+ * > urlsList =
+ Arrays.asList(
+ new ArrayList<>(Arrays.asList("a", "b", "c")),
+ new ArrayList<>(Arrays.asList("a")),
+ new ArrayList<>(Arrays.asList("a a", "a")));
+ for (int i = 0; i < urlList.size(); i++) {
+ LoadBalanceTarget