-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Replication job to trigger setup and carbon flow for replica tables
- Loading branch information
1 parent
3e8d387
commit 4ceabdd
Showing
9 changed files
with
234 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
...spark/src/main/java/com/linkedin/openhouse/jobs/scheduler/tasks/TableReplicationTask.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package com.linkedin.openhouse.jobs.scheduler.tasks; | ||
|
||
import com.linkedin.openhouse.common.JobState; | ||
import com.linkedin.openhouse.jobs.client.JobsClient; | ||
import com.linkedin.openhouse.jobs.client.TablesClient; | ||
import com.linkedin.openhouse.jobs.client.model.JobConf; | ||
import com.linkedin.openhouse.jobs.util.AppConstants; | ||
import com.linkedin.openhouse.jobs.util.OtelConfig; | ||
import com.linkedin.openhouse.jobs.util.ReplicationConfig; | ||
import com.linkedin.openhouse.jobs.util.TableMetadata; | ||
import io.opentelemetry.api.common.AttributeKey; | ||
import io.opentelemetry.api.common.Attributes; | ||
import io.opentelemetry.api.metrics.Meter; | ||
import java.util.List; | ||
import java.util.Optional; | ||
import lombok.extern.slf4j.Slf4j; | ||
|
||
/** A task to apply replication to a table. */ | ||
@Slf4j | ||
public class TableReplicationTask extends TableOperationTask<TableMetadata> { | ||
public static final JobConf.JobTypeEnum OPERATION_TYPE = JobConf.JobTypeEnum.REPLICATION; | ||
private static final Meter METER = OtelConfig.getMeter(OperationTask.class.getName()); | ||
|
||
protected TableReplicationTask( | ||
JobsClient jobsClient, TablesClient tablesClient, TableMetadata tableMetadata) { | ||
super(jobsClient, tablesClient, tableMetadata); | ||
} | ||
|
||
@Override | ||
public JobConf.JobTypeEnum getType() { | ||
return OPERATION_TYPE; | ||
} | ||
|
||
@Override | ||
protected List<String> getArgs() { | ||
return null; | ||
} | ||
|
||
/* Returns empty value iff the callable was interrupted by future cancel. */ | ||
@Override | ||
public Optional<JobState> call() { | ||
if (!shouldRun()) { | ||
log.info("Skipping job for {}, since the operation doesn't need to be run", metadata); | ||
return Optional.empty(); | ||
} | ||
List<ReplicationConfig> replicationConfigs = metadata.getReplicationConfig(); | ||
for (ReplicationConfig config : replicationConfigs) { | ||
log.info("Launching job for {}", metadata); | ||
Attributes typeAttributes = | ||
Attributes.of( | ||
AttributeKey.stringKey(AppConstants.TYPE), | ||
getType().getValue(), | ||
(metadata.getClass().equals(TableMetadata.class) | ||
? AttributeKey.stringKey(AppConstants.TABLE_NAME) | ||
: AttributeKey.stringKey(AppConstants.DATABASE_NAME)), | ||
metadata.getEntityName()); | ||
try { | ||
OtelConfig.executeWithStats( | ||
() -> { | ||
// this is a wrapper to convert boolean false to an exception | ||
if (!launchJob(config)) { | ||
throw new Exception(); | ||
} | ||
return null; | ||
}, | ||
METER, | ||
"submit", | ||
typeAttributes); | ||
} catch (Exception e) { | ||
log.error( | ||
"Could not launch job {} for {}. Exception {}", getType(), metadata, e.getMessage()); | ||
return Optional.empty(); | ||
} | ||
log.info("Launched a job for {}", metadata); | ||
// TODO: implement wait loop for job to finish and update metrics and job state | ||
// TODO: update the jobState with returned value from Airflow client | ||
} | ||
return Optional.of(Enum.valueOf(JobState.class, JobState.FAILED.name())); | ||
} | ||
|
||
protected boolean launchJob(ReplicationConfig config) { | ||
String jobName = | ||
String.format( | ||
"%s_%s_%s_%s", | ||
getType(), config.getCluster(), metadata.getDbName(), metadata.getTableName()); | ||
// TODO: Trigger Airflow job using airflow job client. Config can be used to create airflow | ||
// client params | ||
// TODO: Poll for job ID | ||
log.info("Triggering Replication job: {} via airflow client", jobName); | ||
return false; | ||
} | ||
|
||
@Override | ||
protected boolean shouldRun() { | ||
return metadata.isPrimary() && metadata.getReplicationConfig() != null; | ||
} | ||
} |
65 changes: 65 additions & 0 deletions
65
apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/ReplicationSparkApp.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package com.linkedin.openhouse.jobs.spark; | ||
|
||
import com.linkedin.openhouse.jobs.spark.state.StateManager; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import lombok.extern.slf4j.Slf4j; | ||
import org.apache.commons.cli.CommandLine; | ||
import org.apache.commons.cli.Option; | ||
|
||
/** | ||
* Class with main entry point to replication job to trigger airflow run to setup replication for a | ||
* table with defined replication config | ||
* | ||
* <p>Example of invocation: com.linkedin.openhouse.jobs.spark.ReplicationSparkApp --tableName | ||
* db.testTable | ||
*/ | ||
@Slf4j | ||
public class ReplicationSparkApp extends BaseTableSparkApp { | ||
private final String schedule; | ||
private final String cluster; | ||
private final String proxyUser; | ||
|
||
public ReplicationSparkApp( | ||
String jobId, | ||
StateManager stateManager, | ||
String fqtn, | ||
String schedule, | ||
String cluster, | ||
String proxyUser) { | ||
super(jobId, stateManager, fqtn); | ||
this.schedule = schedule; | ||
this.cluster = cluster; | ||
this.proxyUser = proxyUser; | ||
} | ||
|
||
@Override | ||
protected void runInner(Operations ops) { | ||
log.info( | ||
"Running ReplicationSparkApp for table {}, with parameters schedule: {}, cluster: {}, proxyUser: {}", | ||
fqtn, | ||
schedule, | ||
cluster, | ||
proxyUser); | ||
} | ||
|
||
public static void main(String[] args) { | ||
List<Option> extraOptions = new ArrayList<>(); | ||
extraOptions.add(new Option("t", "tableName", true, "Fully-qualified table name")); | ||
extraOptions.add(new Option("s", "schedule", true, "Replication job schedule in cron format")); | ||
extraOptions.add( | ||
new Option("p", "proxyUser", true, "Proxy user to run carbon replication job")); | ||
extraOptions.add(new Option("p", "cluster", true, "Destination cluster for replication")); | ||
|
||
CommandLine cmdLine = createCommandLine(args, extraOptions); | ||
ReplicationSparkApp app = | ||
new ReplicationSparkApp( | ||
getJobId(cmdLine), | ||
createStateManager(cmdLine), | ||
cmdLine.getOptionValue("tableName"), | ||
cmdLine.getOptionValue("schedule"), | ||
cmdLine.getOptionValue("cluster"), | ||
cmdLine.getOptionValue("proxyUser")); | ||
app.run(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/ReplicationConfig.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package com.linkedin.openhouse.jobs.util; | ||
|
||
import lombok.Builder; | ||
import lombok.EqualsAndHashCode; | ||
import lombok.Getter; | ||
import lombok.ToString; | ||
|
||
/** Table retention config class. This is app side representation of /tables policies->retention */ | ||
@Builder | ||
@Getter | ||
@EqualsAndHashCode | ||
@ToString | ||
public class ReplicationConfig { | ||
private final String schedule; | ||
private final String proxyUser; | ||
private final String cluster; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters