From b86e2a716bf9a07e159db131f7557e77ef5c7f11 Mon Sep 17 00:00:00 2001 From: ksrinath Date: Tue, 19 Nov 2024 16:40:04 +0530 Subject: [PATCH 1/3] Initial iceberg catalog implementation --- .../graphql/resolvers/search/SearchUtils.java | 3 +- .../com/datahub/authorization/AuthUtil.java | 3 +- .../dataplatforminstance/IcebergWarehouse.pdl | 37 +++ .../com/linkedin/dataset/IcebergMetadata.pdl | 21 ++ .../src/main/resources/entity-registry.yml | 2 + metadata-service/iceberg-catalog/build.gradle | 50 ++++ .../iceberg/catalog/CredentialProvider.java | 50 ++++ .../iceberg/catalog/DataHubRestCatalog.java | 275 ++++++++++++++++++ .../iceberg/catalog/DataHubTableOps.java | 216 ++++++++++++++ .../iceberg/catalog/DataHubViewOps.java | 243 ++++++++++++++++ .../iceberg/catalog/DataOperation.java | 29 ++ .../iceberg/catalog/FileIOFactory.java | 14 + .../iceberg/catalog/IcebergApiController.java | 1 + .../iceberg/catalog/S3CredentialProvider.java | 146 ++++++++++ .../com/datahub/iceberg/catalog/Utils.java | 121 ++++++++ .../rest/AbstractIcebergController.java | 167 +++++++++++ .../catalog/rest/DataHubIcebergWarehouse.java | 80 +++++ .../catalog/rest/IcebergApiController.java | 23 ++ .../rest/IcebergConfigApiController.java | 21 ++ .../rest/IcebergExceptionHandlerAdvice.java | 53 ++++ .../rest/IcebergNamespaceApiController.java | 76 +++++ .../catalog/rest/IcebergSpringWebConfig.java | 28 ++ .../rest/IcebergTableApiController.java | 235 +++++++++++++++ .../rest/IcebergViewApiController.java | 136 +++++++++ .../tests/resources/iceberg-warehouse1.yaml | 0 metadata-service/war/build.gradle | 1 + .../servlet/IcebergCatalogServletConfig.java | 10 + .../authorization/PoliciesConfig.java | 41 ++- settings.gradle | 5 +- 29 files changed, 2081 insertions(+), 6 deletions(-) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl create mode 100644 metadata-service/iceberg-catalog/build.gradle create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java create mode 100644 metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml create mode 100644 metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index fbcce1a5e6b065..1591b206d99b90 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -91,7 +91,8 @@ private SearchUtils() {} EntityType.DATA_PRODUCT, EntityType.NOTEBOOK, EntityType.BUSINESS_ATTRIBUTE, - EntityType.SCHEMA_FIELD); + EntityType.SCHEMA_FIELD, + EntityType.DATA_PLATFORM_INSTANCE); /** Entities that are part of autocomplete by default in Auto Complete Across Entities */ public static final List AUTO_COMPLETE_ENTITY_TYPES = diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java index 4f1f0686e76869..e1e2c46e9030ef 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authorization/AuthUtil.java @@ -561,8 +561,7 @@ public static DisjunctivePrivilegeGroup buildDisjunctivePrivilegeGroup( return buildDisjunctivePrivilegeGroup(lookupAPIPrivilege(apiGroup, apiOperation, entityType)); } - @VisibleForTesting - static DisjunctivePrivilegeGroup buildDisjunctivePrivilegeGroup( + public static DisjunctivePrivilegeGroup buildDisjunctivePrivilegeGroup( final Disjunctive> privileges) { return new DisjunctivePrivilegeGroup( privileges.stream() diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl new file mode 100644 index 00000000000000..c17252e6bb3e7c --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl @@ -0,0 +1,37 @@ +namespace com.linkedin.dataplatforminstance + +import com.linkedin.common.Urn + +/** + * An Iceberg warehouse location and credentails whose read/writes are governed by datahub catalog. + */ +@Aspect = { + "name": "icebergWarehouse" +} +record IcebergWarehouse { + + /** + * Path of the root for the backing store of the tables in the warehouse. + */ + dataRoot: string + + /** + * clientId to be used to authenticate with storage hosting this warehouse + */ + clientId: Urn + + /** + * client secret to authenticate with storage hosting this warehouse + */ + clientSecret: Urn + + /** + * region where the warehouse is located. + */ + region: string + + /* + * Role to be used when vending credentials to writers. + */ + role: optional string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl new file mode 100644 index 00000000000000..01307e785cf5ce --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl @@ -0,0 +1,21 @@ +namespace com.linkedin.dataset + +import com.linkedin.common.Uri +import com.linkedin.common.CustomProperties +import com.linkedin.common.ExternalReference +import com.linkedin.common.TimeStamp + +/** + * Iceberg metadata associated with an Iceberg table/view + */ +@Aspect = { + "name": "icebergMetadata" +} +record IcebergMetadata { + + metadataPointer: string + + view: boolean + + // tableProperties: map[string, string] = { } +} diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 96a1c4791f2102..8c35f4e8ddf56d 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -47,6 +47,7 @@ entities: - forms - partitionsSummary - versionProperties + - icebergMetadata - name: dataHubPolicy doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc. category: internal @@ -337,6 +338,7 @@ entities: - institutionalMemory - deprecation - status + - icebergWarehouse - name: mlModel category: core keyAspect: mlModelKey diff --git a/metadata-service/iceberg-catalog/build.gradle b/metadata-service/iceberg-catalog/build.gradle new file mode 100644 index 00000000000000..f94c88a8ce3000 --- /dev/null +++ b/metadata-service/iceberg-catalog/build.gradle @@ -0,0 +1,50 @@ +plugins { + id 'java' +} + +dependencies { + implementation project(':metadata-service:services') + implementation project(':metadata-models') + implementation project(':metadata-utils') + implementation project(':metadata-operation-context') + implementation project(':metadata-integration:java:datahub-schematron:lib') + implementation 'org.apache.iceberg:iceberg-core:1.6.1' + implementation 'org.apache.iceberg:iceberg-aws:1.6.1' + implementation 'software.amazon.awssdk:sts:2.29.29' + implementation 'software.amazon.awssdk:iam-policy-builder:2.29.29' + implementation 'software.amazon.awssdk:s3:2.29.29' + + implementation externalDependency.reflections + implementation externalDependency.springBoot + implementation externalDependency.springCore + implementation(externalDependency.springDocUI) { + exclude group: 'org.springframework.boot' + } + implementation externalDependency.springWeb + implementation externalDependency.springWebMVC + implementation externalDependency.springBeans + implementation externalDependency.springContext + implementation externalDependency.springBootAutoconfigure + implementation externalDependency.servletApi + implementation externalDependency.slf4jApi + compileOnly externalDependency.lombok + implementation externalDependency.antlr4Runtime + implementation externalDependency.antlr4 + implementation externalDependency.javaxInject + implementation externalDependency.avro + + annotationProcessor externalDependency.lombok + + testImplementation externalDependency.springBootTest + testImplementation project(':mock-entity-registry') + testImplementation externalDependency.springBoot + testImplementation externalDependency.testContainers + testImplementation externalDependency.testContainersKafka + testImplementation externalDependency.springKafka + testImplementation externalDependency.testng + testImplementation externalDependency.mockito + testImplementation externalDependency.logbackClassic + testImplementation externalDependency.jacksonCore + testImplementation externalDependency.jacksonDataBind + testImplementation externalDependency.springBootStarterWeb +} \ No newline at end of file diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java new file mode 100644 index 00000000000000..3e9505f03e15a0 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java @@ -0,0 +1,50 @@ +package com.datahub.iceberg.catalog; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.linkedin.metadata.authorization.PoliciesConfig; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; + +public abstract class CredentialProvider { + private static final int CREDS_DURATION_SECS = 15 * 60; + + @EqualsAndHashCode + @AllArgsConstructor + public static class CredentialsCacheKey { + public final String platformInstance; + public final PoliciesConfig.Privilege privilege; + public final Set locations; + } + + @AllArgsConstructor + public static class StorageProviderCredentials { + public final String clientId; + public final String clientSecret; + public final String role; + public final String region; + } + + private final Cache> credentialCache; + + public CredentialProvider() { + this.credentialCache = + CacheBuilder.newBuilder().expireAfterWrite(CREDS_DURATION_SECS, TimeUnit.SECONDS).build(); + } + + public Map get( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { + try { + return credentialCache.get(key, () -> loadItem(key, storageProviderCredentials)); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } + } + + protected abstract Map loadItem( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials); +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java new file mode 100644 index 00000000000000..efab3dd823e787 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java @@ -0,0 +1,275 @@ +package com.datahub.iceberg.catalog; + +import static com.datahub.iceberg.catalog.Utils.*; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; + +import com.datahub.iceberg.catalog.rest.DataHubIcebergWarehouse; +import com.google.common.base.Joiner; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.container.Container; +import com.linkedin.container.ContainerProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.*; +import org.apache.iceberg.aws.s3.S3FileIO; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.view.BaseMetastoreViewCatalog; +import org.apache.iceberg.view.ViewOperations; + +public class DataHubRestCatalog extends BaseMetastoreViewCatalog implements SupportsNamespaces { + private final CredentialProvider credentialProvider; + + private final EntityService entityService; + + private final OperationContext operationContext; + + private final CloseableGroup closeableGroup; + + private final String CATALOG_POINTER_ROOT_DIR = "s3://srinath-dev/icebreaker/"; + + private final DataHubIcebergWarehouse warehouse; + + public DataHubRestCatalog( + EntityService entityService, + OperationContext operationContext, + DataHubIcebergWarehouse warehouse, + CredentialProvider credentialProvider) { + this.entityService = entityService; + this.operationContext = operationContext; + this.credentialProvider = credentialProvider; + this.warehouse = warehouse; + this.closeableGroup = new CloseableGroup(); + this.closeableGroup.setSuppressCloseFailure(true); + } + + @Override + public void renameView(TableIdentifier tableIdentifier, TableIdentifier tableIdentifier1) {} + + @Override + public void initialize(String name, Map properties) {} + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + return new DataHubTableOps( + platformInstance(), + tableIdentifier, + entityService, + operationContext, + new S3FileIOFactory()); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + String warehouseRoot = warehouse.getDataRoot(); + return warehouseRoot + + CatalogUtil.fullTableName(platformInstance(), tableIdentifier).replaceAll("\\.", "/"); + } + + @Override + public List listTables(Namespace namespace) { + throw new UnsupportedOperationException(); + } + + @Override + public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { + if (purge) { + throw new UnsupportedOperationException(); + } + + return deletaDataset(tableIdentifier); + } + + private boolean deletaDataset(TableIdentifier tableIdentifier) { + DatasetUrn urn = datasetUrn(platformInstance(), tableIdentifier); + if (!entityService.exists(operationContext, urn)) { + return false; + } + entityService.deleteUrn(operationContext, urn); + return true; + } + + @Override + public Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + if (tableExists(identifier)) { + throw new AlreadyExistsException("Table already exists: %s", identifier); + } + + FileIO io = + new S3FileIOFactory() + .createIO( + platformInstance(), + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of(parentDir(metadataFileLocation))); + InputFile metadataFile = io.newInputFile(metadataFileLocation); + TableMetadata metadata = TableMetadataParser.read(io, metadataFile); + + TableOperations ops = newTableOps(identifier); + ops.commit(null, metadata); + + return new BaseTable(ops, fullTableName(name(), identifier), metricsReporter()); + } + + @Override + public void renameTable(TableIdentifier tableIdentifier, TableIdentifier tableIdentifier1) { + // TODO + } + + @Override + public void createNamespace(Namespace namespace, Map map) { + AuditStamp auditStamp = auditStamp(); + Urn containerUrn = containerUrn(platformInstance(), namespace); + + int nLevels = namespace.length(); + if (nLevels > 1) { + String[] parentLevels = Arrays.copyOfRange(namespace.levels(), 0, nLevels - 1); + Urn parentContainerUrn = containerUrn(platformInstance(), parentLevels); + if (!entityService.exists(operationContext, parentContainerUrn)) { + throw new NoSuchNamespaceException( + "Parent namespace %s does not exist in platformInstance-catalog %s", + Joiner.on(".").join(parentLevels), platformInstance()); + } + ingestContainerAspect( + containerUrn, + CONTAINER_ASPECT_NAME, + new Container().setContainer(parentContainerUrn), + auditStamp); + } + + ingestContainerAspect( + containerUrn, + CONTAINER_PROPERTIES_ASPECT_NAME, + new ContainerProperties().setName(namespace.levels()[nLevels - 1]), + auditStamp); + + ingestContainerAspect( + containerUrn, + SUB_TYPES_ASPECT_NAME, + new SubTypes().setTypeNames(new StringArray("IcebergNamespace")), + auditStamp); + + MetadataChangeProposal platformInstanceMcp = + platformInstanceMcp(platformInstance(), containerUrn, CONTAINER_ENTITY_NAME); + ingestMcp(platformInstanceMcp, auditStamp); + } + + @Override + public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException { + return List.of(); + } + + @Override + public Map loadNamespaceMetadata(Namespace namespace) + throws NoSuchNamespaceException { + if (entityService.exists(operationContext, containerUrn(platformInstance(), namespace))) { + return Map.of(); + } else { + throw new NoSuchNamespaceException("Namespace does not exist: " + namespace); + } + } + + @Override + public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { + return false; + } + + @Override + public boolean setProperties(Namespace namespace, Map map) + throws NoSuchNamespaceException { + return false; + } + + @Override + public boolean removeProperties(Namespace namespace, Set set) + throws NoSuchNamespaceException { + return false; + } + + @Override + public void close() throws IOException { + super.close(); + this.closeableGroup.close(); + } + + private void ingestContainerAspect( + Urn containerUrn, String aspectName, RecordTemplate aspect, AuditStamp auditStamp) { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(containerUrn); + mcp.setEntityType(CONTAINER_ENTITY_NAME); + mcp.setAspectName(aspectName); + mcp.setAspect(serializeAspect(aspect)); + mcp.setChangeType(ChangeType.UPSERT); + ingestMcp(mcp, auditStamp); + } + + private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } + + private class S3FileIOFactory implements FileIOFactory { + @Override + public FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, Set locations) { + + FileIO io = new S3FileIO(); + Map creds = + credentialProvider.get( + new S3CredentialProvider.CredentialsCacheKey(platformInstance, privilege, locations), + warehouse.getStorageProviderCredentials()); + io.initialize(creds); + closeableGroup.addCloseable(io); + return io; + } + + @Override + public FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, TableMetadata tableMetadata) { + return createIO(platformInstance, privilege, locations(tableMetadata)); + } + } + + @Override + protected ViewOperations newViewOps(TableIdentifier tableIdentifier) { + return new DataHubViewOps( + platformInstance(), + tableIdentifier, + entityService, + operationContext, + new S3FileIOFactory()); + } + + @Override + public List listViews(Namespace namespace) { + return List.of(); + } + + @Override + public boolean dropView(TableIdentifier tableIdentifier) { + return deletaDataset(tableIdentifier); + } + + private String platformInstance() { + return warehouse.getPlatformInstance(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java new file mode 100644 index 00000000000000..a2b6cc0965addb --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java @@ -0,0 +1,216 @@ +package com.datahub.iceberg.catalog; + +import static com.datahub.iceberg.catalog.Utils.*; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.dataset.IcebergMetadata; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.schema.SchemaMetadata; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; +import java.util.Collections; +import java.util.Set; +import lombok.SneakyThrows; +import org.apache.avro.Schema; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.io.FileIO; + +public class DataHubTableOps extends BaseMetastoreTableOperations { + + private static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergMetadata"; + + private final String platformInstance; + private FileIO io; + private final TableIdentifier tableIdentifier; + private final DatasetUrn urn; + private final EntityService entityService; + private final OperationContext operationContext; + private final FileIOFactory fileIOFactory; + private volatile TableMetadata currentMetadata = null; + private volatile boolean shouldRefresh = true; + + public DataHubTableOps( + String platformInstance, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + this.platformInstance = platformInstance; + this.tableIdentifier = tableIdentifier; + this.entityService = entityService; + this.operationContext = operationContext; + this.fileIOFactory = fileIOFactory; + this.urn = datasetUrn(platformInstance, tableIdentifier); + } + + @Override + public TableMetadata refresh() { + IcebergMetadata icebergMeta = + (IcebergMetadata) + entityService.getLatestAspect( + operationContext, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); + if (icebergMeta == null || icebergMeta.isView()) { + return null; + } + String location = icebergMeta.getMetadataPointer(); + if (io == null) { + io = + fileIOFactory.createIO( + platformInstance, + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of(parentDir(location))); + } + // TODO check UUID ala HadoopTableOps? + currentMetadata = TableMetadataParser.read(io(), location); + shouldRefresh = false; + return currentMetadata; + } + + @Override + public TableMetadata current() { + if (shouldRefresh) { + return refresh(); + } + return currentMetadata; + } + + @SneakyThrows + @Override + protected void doCommit(TableMetadata base, TableMetadata metadata) { + + EnvelopedAspect existingEnveloped = + entityService.getLatestEnvelopedAspect( + operationContext, DATASET_ENTITY_NAME, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); + + boolean creation = base == null; + + if (existingEnveloped != null) { + if (creation) { + throw new AlreadyExistsException("Table already exists: %s", tableName()); + } + IcebergMetadata existingMetadata = new IcebergMetadata(existingEnveloped.getValue().data()); + if (existingMetadata.isView()) { + throw new NoSuchTableException("%s is not a table", tableName()); + } + if (!existingMetadata.getMetadataPointer().equals(base.metadataFileLocation())) { + throw new CommitFailedException( + "Cannot commit to table %s: stale table metadata", tableName()); + } + } + + // attempt to commit + io = + fileIOFactory.createIO( + platformInstance, PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, metadata); + String newMetadataLocation = writeNewMetadataIfRequired(base == null, metadata); + + MetadataChangeProposal icebergMcp = newMcp(DATASET_ICEBERG_METADATA_ASPECT_NAME); + icebergMcp.setAspect( + serializeAspect( + new IcebergMetadata().setMetadataPointer(newMetadataLocation).setView(false))); + + if (creation) { + icebergMcp.setChangeType(ChangeType.CREATE_ENTITY); + } else { + String existingVersion = existingEnveloped.getSystemMetadata().getVersion(); + icebergMcp.setHeaders( + new StringMap(Collections.singletonMap(HTTP_HEADER_IF_VERSION_MATCH, existingVersion))); + icebergMcp.setChangeType( + ChangeType.UPSERT); // ideally should be UPDATE, but seems not supported yet. + } + AuditStamp auditStamp = auditStamp(); + try { + ingestMcp(icebergMcp, auditStamp); + } catch (ValidationException e) { + if (creation) { + // this is likely because table already exists i.e. created concurrently in a race condition + throw new AlreadyExistsException("Table already exists: %s", tableName()); + } else { + throw new CommitFailedException( + "Cannot commit to table %s: stale table metadata", tableName()); + } + } + + if (base == null || (base.currentSchemaId() != metadata.currentSchemaId())) { + // schema changed + Schema avroSchema = AvroSchemaUtil.convert(metadata.schema(), tableName()); + AvroSchemaConverter converter = AvroSchemaConverter.builder().build(); + SchemaMetadata schemaMetadata = + converter.toDataHubSchema(avroSchema, false, false, platformUrn(), null); + MetadataChangeProposal schemaMcp = newMcp(SCHEMA_METADATA_ASPECT_NAME); + schemaMcp.setAspect(serializeAspect(schemaMetadata)); + schemaMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(schemaMcp, auditStamp); + } + + if (creation) { + DatasetProperties datasetProperties = new DatasetProperties(); + datasetProperties.setName(tableIdentifier.name()); + datasetProperties.setQualifiedName(tableName()); + + MetadataChangeProposal datasetPropertiesMcp = newMcp(DATASET_PROPERTIES_ASPECT_NAME); + datasetPropertiesMcp.setAspect(serializeAspect(datasetProperties)); + datasetPropertiesMcp.setChangeType(ChangeType.UPSERT); + + ingestMcp(datasetPropertiesMcp, auditStamp); + + MetadataChangeProposal platformInstanceMcp = + platformInstanceMcp(platformInstance, urn, DATASET_ENTITY_NAME); + ingestMcp(platformInstanceMcp, auditStamp); + + Container container = new Container(); + container.setContainer(containerUrn(platformInstance, tableIdentifier.namespace())); + + MetadataChangeProposal containerMcp = newMcp(CONTAINER_ASPECT_NAME); + containerMcp.setAspect(serializeAspect(container)); + containerMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(containerMcp, auditStamp); + } + } + + @Override + public void commit(TableMetadata base, TableMetadata metadata) { + super.commit(base, metadata); + } + + @Override + protected String tableName() { + return fullTableName(platformInstance, tableIdentifier); + } + + @Override + public FileIO io() { + return io; + } + + private MetadataChangeProposal newMcp(String aspectName) { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(urn); + mcp.setEntityType(DATASET_ENTITY_NAME); + mcp.setAspectName(aspectName); + return mcp; + } + + private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java new file mode 100644 index 00000000000000..8fdd61b863600e --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java @@ -0,0 +1,243 @@ +package com.datahub.iceberg.catalog; + +import static com.datahub.iceberg.catalog.Utils.*; +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.dataset.IcebergMetadata; +import com.linkedin.dataset.ViewProperties; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.schema.SchemaMetadata; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; +import java.util.Collections; +import java.util.Set; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.Schema; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.CommitFailedException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.ValidationException; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.view.*; + +@Slf4j +public class DataHubViewOps extends BaseViewOperations { + + private static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergMetadata"; + + private final String platformInstance; + private FileIO io; + private final TableIdentifier tableIdentifier; + private final DatasetUrn urn; + private final EntityService entityService; + private final OperationContext operationContext; + private final FileIOFactory fileIOFactory; + private volatile ViewMetadata currentMetadata = null; + private volatile boolean shouldRefresh = true; + + public DataHubViewOps( + String platformInstance, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + this.platformInstance = platformInstance; + this.tableIdentifier = tableIdentifier; + this.entityService = entityService; + this.operationContext = operationContext; + this.fileIOFactory = fileIOFactory; + this.urn = datasetUrn(platformInstance, tableIdentifier); + } + + @Override + public ViewMetadata refresh() { + IcebergMetadata icebergMeta = + (IcebergMetadata) + entityService.getLatestAspect( + operationContext, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); + if (icebergMeta == null || !icebergMeta.isView()) { + return null; + } + String location = icebergMeta.getMetadataPointer(); + if (io == null) { + String locationDir = location.substring(0, location.lastIndexOf("/")); + io = + fileIOFactory.createIO( + platformInstance, PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, Set.of(locationDir)); + } + // TODO check UUID ala HadoopTableOps? + currentMetadata = ViewMetadataParser.read(io().newInputFile(location)); + shouldRefresh = false; + return currentMetadata; + } + + @Override + public ViewMetadata current() { + if (shouldRefresh) { + return refresh(); + } + return currentMetadata; + } + + @Override + protected void doRefresh() { + throw new UnsupportedOperationException(); + } + + @SneakyThrows + @Override + protected void doCommit(ViewMetadata base, ViewMetadata metadata) { + + EnvelopedAspect existingEnveloped = + entityService.getLatestEnvelopedAspect( + operationContext, DATASET_ENTITY_NAME, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); + + boolean creation = base == null; + + if (existingEnveloped != null) { + if (creation) { + throw new AlreadyExistsException("Table already exists: %s", viewName()); + } + IcebergMetadata existingMetadata = new IcebergMetadata(existingEnveloped.getValue().data()); + if (!existingMetadata.isView()) { + throw new NoSuchTableException("%s is not a view", viewName()); + } + if (!existingMetadata.getMetadataPointer().equals(base.metadataFileLocation())) { + throw new CommitFailedException( + "Cannot commit to table %s: stale table metadata", viewName()); + } + } + + // attempt to commit + io = + fileIOFactory.createIO( + platformInstance, + PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, + Set.of(metadata.location())); + String newMetadataLocation = writeNewMetadataIfRequired(metadata); + + MetadataChangeProposal icebergMcp = newMcp(DATASET_ICEBERG_METADATA_ASPECT_NAME); + icebergMcp.setAspect( + serializeAspect( + new IcebergMetadata().setMetadataPointer(newMetadataLocation).setView(true))); + + if (creation) { + icebergMcp.setChangeType(ChangeType.CREATE_ENTITY); + } else { + String existingVersion = existingEnveloped.getSystemMetadata().getVersion(); + icebergMcp.setHeaders( + new StringMap(Collections.singletonMap(HTTP_HEADER_IF_VERSION_MATCH, existingVersion))); + icebergMcp.setChangeType( + ChangeType.UPSERT); // ideally should be UPDATE, but seems not supported yet. + } + AuditStamp auditStamp = auditStamp(); + try { + ingestMcp(icebergMcp, auditStamp); + } catch (ValidationException e) { + if (creation) { + // this is likely because table already exists i.e. created concurrently in a race condition + throw new AlreadyExistsException("View already exists: %s", viewName()); + } else { + throw new CommitFailedException( + "Cannot commit to table %s: stale table metadata", viewName()); + } + } + + if (base == null || (base.currentSchemaId() != metadata.currentSchemaId())) { + // schema changed + Schema avroSchema = AvroSchemaUtil.convert(metadata.schema(), viewName()); + AvroSchemaConverter converter = AvroSchemaConverter.builder().build(); + SchemaMetadata schemaMetadata = + converter.toDataHubSchema(avroSchema, false, false, platformUrn(), null); + MetadataChangeProposal schemaMcp = newMcp(SCHEMA_METADATA_ASPECT_NAME); + schemaMcp.setAspect(serializeAspect(schemaMetadata)); + schemaMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(schemaMcp, auditStamp); + } + + if (creation) { + DatasetProperties datasetProperties = new DatasetProperties(); + datasetProperties.setName(tableIdentifier.name()); + datasetProperties.setQualifiedName(viewName()); + + MetadataChangeProposal datasetPropertiesMcp = newMcp(DATASET_PROPERTIES_ASPECT_NAME); + datasetPropertiesMcp.setAspect(serializeAspect(datasetProperties)); + datasetPropertiesMcp.setChangeType(ChangeType.UPSERT); + + ingestMcp(datasetPropertiesMcp, auditStamp); + + MetadataChangeProposal platformInstanceMcp = + platformInstanceMcp(platformInstance, urn, DATASET_ENTITY_NAME); + ingestMcp(platformInstanceMcp, auditStamp); + + Container container = new Container(); + container.setContainer(containerUrn(platformInstance, tableIdentifier.namespace())); + + MetadataChangeProposal containerMcp = newMcp(CONTAINER_ASPECT_NAME); + containerMcp.setAspect(serializeAspect(container)); + containerMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(containerMcp, auditStamp); + } + + SQLViewRepresentation sqlViewRepresentation = null; + for (ViewRepresentation representation : metadata.currentVersion().representations()) { + if (representation instanceof SQLViewRepresentation) { + sqlViewRepresentation = (SQLViewRepresentation) representation; + // use only first representation, as DataHub model currently supports one SQL. + break; + } + } + if (sqlViewRepresentation == null) { + // base class is ensuring that a representation has been specified in case of replace-view. + // so, this shouldn't occur. + log.warn("No SQL representation for view {}", viewName()); + } else { + ViewProperties viewProperties = + new ViewProperties() + .setViewLogic(sqlViewRepresentation.sql()) + .setMaterialized(false) + .setViewLanguage(sqlViewRepresentation.dialect()); + MetadataChangeProposal viewPropertiesMcp = newMcp(VIEW_PROPERTIES_ASPECT_NAME); + viewPropertiesMcp.setAspect(serializeAspect(viewProperties)); + viewPropertiesMcp.setChangeType(ChangeType.UPSERT); + + ingestMcp(viewPropertiesMcp, auditStamp); + } + } + + @Override + protected String viewName() { + return fullTableName(platformInstance, tableIdentifier); + } + + @Override + public FileIO io() { + return io; + } + + private MetadataChangeProposal newMcp(String aspectName) { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(urn); + mcp.setEntityType(DATASET_ENTITY_NAME); + mcp.setAspectName(aspectName); + return mcp; + } + + private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java new file mode 100644 index 00000000000000..67941b05f4e2c4 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java @@ -0,0 +1,29 @@ +package com.datahub.iceberg.catalog; + +import static com.linkedin.metadata.authorization.PoliciesConfig.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.linkedin.metadata.authorization.PoliciesConfig; +import java.util.List; + +public enum DataOperation { + READ_ONLY( + DATA_READ_ONLY_PRIVILEGE, + DATA_MANAGE_VIEWS_PRIVILEGE, + DATA_READ_WRITE_PRIVILEGE, + DATA_MANAGE_TABLES_PRIVILEGE), + + READ_WRITE(DATA_READ_WRITE_PRIVILEGE, DATA_MANAGE_TABLES_PRIVILEGE), + MANAGE_VIEWS(DATA_MANAGE_VIEWS_PRIVILEGE, DATA_MANAGE_TABLES_PRIVILEGE), + MANAGE_TABLES(DATA_MANAGE_TABLES_PRIVILEGE), + MANAGE_NAMESPACES(DATA_MANAGE_NAMESPACES_PRIVILEGE); + + public final List ascendingPrivileges; + public final List descendingPrivileges; + + DataOperation(PoliciesConfig.Privilege... ascendingPrivileges) { + this.ascendingPrivileges = ImmutableList.copyOf(ascendingPrivileges); + this.descendingPrivileges = Lists.reverse(this.ascendingPrivileges); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java new file mode 100644 index 00000000000000..8726bce1477486 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java @@ -0,0 +1,14 @@ +package com.datahub.iceberg.catalog; + +import com.linkedin.metadata.authorization.PoliciesConfig; +import java.util.Set; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.io.FileIO; + +interface FileIOFactory { + FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, Set locations); + + FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, TableMetadata tableMetadata); +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java new file mode 100644 index 00000000000000..8b137891791fe9 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java @@ -0,0 +1 @@ + diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java new file mode 100644 index 00000000000000..64b2e4f873b26a --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java @@ -0,0 +1,146 @@ +package com.datahub.iceberg.catalog; + +import static com.linkedin.metadata.authorization.PoliciesConfig.*; + +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import lombok.EqualsAndHashCode; +import org.apache.iceberg.exceptions.BadRequestException; +import org.springframework.stereotype.Component; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; +import software.amazon.awssdk.policybuilder.iam.IamEffect; +import software.amazon.awssdk.policybuilder.iam.IamPolicy; +import software.amazon.awssdk.policybuilder.iam.IamStatement; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; + +@Component +class S3CredentialProvider extends CredentialProvider { + private static final int CREDS_DURATION_SECS = 15 * 60; + + private static final Region REGION = Region.US_EAST_1; + + protected Map loadItem( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { + StsClient stsClient = stsClient(storageProviderCredentials); + String sessionPolicy = policyString(key); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleArn(storageProviderCredentials.role) + .roleSessionName("IcebreakerSession") // TODO: name suggests this should per + .durationSeconds(CREDS_DURATION_SECS) + .policy(sessionPolicy) + .build()); + + return Map.of( + "client.region", + REGION.id(), + "s3.access-key-id", + response.credentials().accessKeyId(), + "s3.secret-access-key", + response.credentials().secretAccessKey(), + "s3.session-token", + response.credentials().sessionToken()); + } + + private StsClient stsClient(StorageProviderCredentials storageProviderCredentials) { + AwsBasicCredentials credentials = + AwsBasicCredentials.create( + storageProviderCredentials.clientId, storageProviderCredentials.clientSecret); + return StsClient.builder() + .region(Region.of(storageProviderCredentials.region)) + .credentialsProvider(StaticCredentialsProvider.create(credentials)) + .region(Region.of(storageProviderCredentials.region)) + .build(); + } + + private String policyString(CredentialsCacheKey key) { + if (key.locations == null || key.locations.isEmpty()) { + throw new BadRequestException("Unspecified locations for credential vending."); + } + if (!Set.of(DATA_READ_WRITE_PRIVILEGE, DATA_READ_ONLY_PRIVILEGE).contains(key.privilege)) { + throw new IllegalStateException("Unsupported credential vending privilege " + key.privilege); + } + + Map bucketListPolicy = new HashMap<>(); + IamStatement.Builder objectsPolicy = + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetObject") + .addAction("s3:GetObjectVersion"); + + if (DATA_READ_WRITE_PRIVILEGE.equals(key.privilege)) { + objectsPolicy.addAction("s3:PutObject").addAction("s3:DeleteObject"); + } + + key.locations.forEach( + location -> { + S3Location s3Location = new S3Location(location); + objectsPolicy.addResource(s3Location.objectsArn()); + bucketListPolicy + .computeIfAbsent( + s3Location.bucketArn(), + bucketArn -> + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:ListBucket") + .addResource(bucketArn)) + .addCondition( + IamConditionOperator.STRING_LIKE, "s3:prefix", s3Location.objectsPathPrefix()); + }); + + IamPolicy.Builder sessionPolicyBuilder = IamPolicy.builder(); + sessionPolicyBuilder.addStatement(objectsPolicy.build()); + + for (Map.Entry bucketListStatement : + bucketListPolicy.entrySet()) { + sessionPolicyBuilder.addStatement(bucketListStatement.getValue().build()); + + String bucketArn = bucketListStatement.getKey(); + sessionPolicyBuilder.addStatement( + IamStatement.builder() + .effect(IamEffect.ALLOW) + .addAction("s3:GetBucketLocation") + .addResource(bucketArn) + .build()); + } + return sessionPolicyBuilder.build().toJson(); + } + + @EqualsAndHashCode + private static class S3Location { + private final String bucket; + private final String path; + private final String s3ArnPrefix; + + S3Location(String location) { + URI uri = URI.create(location); + this.bucket = uri.getAuthority(); + String path = uri.getPath(); + if (path.startsWith("/")) { + path = path.substring(1); + } + this.path = path; + this.s3ArnPrefix = "arn:aws:s3:::"; + } + + String objectsArn() { + return bucketArn() + "/" + objectsPathPrefix(); + } + + String bucketArn() { + return s3ArnPrefix + bucket; + } + + String objectsPathPrefix() { + return path + "/*"; + } + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java new file mode 100644 index 00000000000000..2c0212308cc5a3 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java @@ -0,0 +1,121 @@ +package com.datahub.iceberg.catalog; + +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataPlatformInstanceKey; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.nio.charset.Charset; +import java.util.HashSet; +import java.util.Set; +import lombok.SneakyThrows; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTUtil; + +public class Utils { + private static final String PLATFORM_NAME = "nativeIceberg"; + + public static AuditStamp auditStamp() { + try { + return new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + public static MetadataChangeProposal platformInstanceMcp( + String platformInstanceName, Urn urn, String entityType) { + DataPlatformInstance platformInstance = new DataPlatformInstance(); + platformInstance.setPlatform(platformUrn()); + platformInstance.setInstance(platformInstanceUrn(platformInstanceName)); + + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(urn); + mcp.setEntityType(entityType); + mcp.setAspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME); + mcp.setAspect(serializeAspect(platformInstance)); + mcp.setChangeType(ChangeType.UPSERT); + + return mcp; + } + + public static DataPlatformUrn platformUrn() { + return new DataPlatformUrn(PLATFORM_NAME); + } + + public static Urn platformInstanceUrn(String platformInstance) { + DataPlatformInstanceKey platformInstanceKey = + new DataPlatformInstanceKey().setInstance(platformInstance).setPlatform(platformUrn()); + return EntityKeyUtils.convertEntityKeyToUrn( + platformInstanceKey, DATA_PLATFORM_INSTANCE_ENTITY_NAME); + } + + public static FabricType fabricType() { + // TODO configurable fabricType + return FabricType.DEV; + } + + public static Urn containerUrn(String platformInstance, Namespace ns) { + return containerUrn(platformInstance, ns.levels()); + } + + @SneakyThrows + public static Urn containerUrn(String platformInstance, String[] levels) { + StringBuilder containerFullName = new StringBuilder(platformInstance); + for (String level : levels) { + containerFullName.append(".").append(level); + } + return Urn.createFromString("urn:li:container:nativeIceberg__" + containerFullName); + } + + public static DatasetUrn datasetUrn(String platformInstance, TableIdentifier tableIdentifier) { + return new DatasetUrn( + platformUrn(), CatalogUtil.fullTableName(platformInstance, tableIdentifier), fabricType()); + } + + public static String fullTableName(String platformInstance, TableIdentifier tableIdentifier) { + return CatalogUtil.fullTableName(platformInstance, tableIdentifier); + } + + public static Set locations(TableMetadata tableMetadata) { + Set locations = new HashSet<>(); + locations.add(tableMetadata.location()); + if (tableMetadata.properties().containsKey(TableProperties.WRITE_DATA_LOCATION)) { + locations.add(tableMetadata.properties().get(TableProperties.WRITE_DATA_LOCATION)); + } + if (tableMetadata.properties().containsKey(TableProperties.WRITE_METADATA_LOCATION)) { + locations.add(tableMetadata.properties().get(TableProperties.WRITE_METADATA_LOCATION)); + } + return locations; + } + + public static Namespace namespaceFromString(String namespace) { + return RESTUtil.decodeNamespace(URLEncoder.encode(namespace, Charset.defaultCharset())); + } + + public static TableIdentifier tableIdFromString(String namespace, String table) { + return TableIdentifier.of(namespaceFromString(namespace), RESTUtil.decodeString(table)); + } + + public static String parentDir(String fileLocation) { + return fileLocation.substring(0, fileLocation.lastIndexOf("/")); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java new file mode 100644 index 00000000000000..72b64e9d30ad4b --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java @@ -0,0 +1,167 @@ +package com.datahub.iceberg.catalog.rest; + +import static com.datahub.iceberg.catalog.Utils.*; +import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthUtil; +import com.datahub.authorization.EntitySpec; +import com.datahub.iceberg.catalog.CredentialProvider; +import com.datahub.iceberg.catalog.DataHubRestCatalog; +import com.datahub.iceberg.catalog.DataOperation; +import com.datahub.plugins.auth.authorization.Authorizer; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.context.RequestContext; +import jakarta.servlet.http.HttpServletRequest; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.function.Function; +import javax.inject.Inject; +import javax.inject.Named; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.ForbiddenException; +import org.springframework.beans.factory.annotation.Autowired; + +@Slf4j +public class AbstractIcebergController { + @Autowired private EntityService entityService; + @Autowired protected CredentialProvider credentialProvider; + + @Inject + @Named("authorizerChain") + private Authorizer authorizer; + + @Inject + @Named("systemOperationContext") + private OperationContext systemOperationContext; + + protected PoliciesConfig.Privilege authorize( + OperationContext operationContext, + String platformInstance, + TableIdentifier tableIdentifier, + DataOperation operation, + boolean returnHighestPrivilege) { + DatasetUrn urn = datasetUrn(platformInstance, tableIdentifier); + EntitySpec entitySpec = new EntitySpec(DATASET_ENTITY_NAME, urn.toString()); + return authorize( + operationContext, + entitySpec, + platformInstanceEntitySpec(platformInstance), + operation, + returnHighestPrivilege); + } + + protected PoliciesConfig.Privilege authorize( + OperationContext operationContext, + String platformInstance, + DataOperation operation, + boolean returnHighestPrivilege) { + EntitySpec entitySpec = platformInstanceEntitySpec(platformInstance); + return authorize(operationContext, entitySpec, entitySpec, operation, returnHighestPrivilege); + } + + private EntitySpec platformInstanceEntitySpec(String platformInstance) { + Urn urn = platformInstanceUrn(platformInstance); + return new EntitySpec(DATA_PLATFORM_INSTANCE_ENTITY_NAME, urn.toString()); + } + + private PoliciesConfig.Privilege authorize( + OperationContext operationContext, + EntitySpec entitySpec, + EntitySpec platformInstanceEntitySpec, + DataOperation operation, + boolean returnHighestPrivilege) { + List privileges = + returnHighestPrivilege ? operation.descendingPrivileges : operation.ascendingPrivileges; + + for (PoliciesConfig.Privilege privilege : privileges) { + if ((entitySpec.getType().equals(DATASET_ENTITY_NAME) + && PoliciesConfig.DATASET_PRIVILEGES.getPrivileges().contains(privilege) + || (entitySpec.getType().equals(DATA_PLATFORM_INSTANCE_ENTITY_NAME) + && PoliciesConfig.PLATFORM_INSTANCE_PRIVILEGES + .getPrivileges() + .contains(privilege)))) { + if (AuthUtil.isAuthorized(operationContext, privilege, entitySpec)) { + return privilege; + } + } else if (entitySpec.getType().equals(DATASET_ENTITY_NAME) + && PoliciesConfig.PLATFORM_INSTANCE_PRIVILEGES.getPrivileges().contains(privilege)) { + if (AuthUtil.isAuthorized(operationContext, privilege, platformInstanceEntitySpec)) { + return privilege; + } + } + } + + throw new ForbiddenException("Data operation %s not authorized on %s", operation, entitySpec); + } + + @Data + @AllArgsConstructor + protected static class CatalogOperationResult { + private R response; + private PoliciesConfig.Privilege privilege; + private CredentialProvider.StorageProviderCredentials storageProviderCredentials; + } + + protected R catalogOperation( + String platformInstance, + HttpServletRequest request, + Function authorizer, + Function function, + Function, R> includeCreds) { + OperationContext operationContext = opContext(request); + PoliciesConfig.Privilege privilege = authorizer.apply(operationContext); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of(platformInstance, entityService, operationContext); + DataHubRestCatalog catalog = catalog(operationContext, warehouse, platformInstance); + try { + R response = function.apply(catalog); + if (includeCreds == null) { + return response; + } else { + CatalogOperationResult operationResult = + new CatalogOperationResult<>( + response, privilege, warehouse.getStorageProviderCredentials()); + return includeCreds.apply(operationResult); + } + } finally { + try { + catalog.close(); + } catch (IOException e) { + log.error("Error while closing catalog", e); + } + } + } + + protected OperationContext opContext(HttpServletRequest request) { + Authentication auth = AuthenticationContext.getAuthentication(); + return OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi(auth.getActor().toUrnStr(), request, "icebergDataAction", "dataset"), + authorizer, + auth, + true); + } + + protected DataHubRestCatalog catalog( + OperationContext operationContext, + DataHubIcebergWarehouse warehouse, + String platformInstance) { + DataHubRestCatalog catalog = + new DataHubRestCatalog(entityService, operationContext, warehouse, credentialProvider); + catalog.initialize(platformInstance, Collections.emptyMap()); + return catalog; + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java new file mode 100644 index 00000000000000..d904cc7912923f --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java @@ -0,0 +1,80 @@ +package com.datahub.iceberg.catalog.rest; + +import com.datahub.iceberg.catalog.CredentialProvider; +import com.datahub.iceberg.catalog.Utils; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataplatforminstance.IcebergWarehouse; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.secret.DataHubSecretValue; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.Map; +import java.util.Set; +import lombok.Getter; + +public class DataHubIcebergWarehouse { + + private final EntityService entityService; + + private final OperationContext operationContext; + + private final IcebergWarehouse icebergWarehouse; + + @Getter private final String platformInstance; + + private DataHubIcebergWarehouse( + String platformInstance, + IcebergWarehouse icebergWarehouse, + EntityService entityService, + OperationContext operationContext) { + this.platformInstance = platformInstance; + this.icebergWarehouse = icebergWarehouse; + this.entityService = entityService; + this.operationContext = operationContext; + } + + public static DataHubIcebergWarehouse of( + String platformInstance, EntityService entityService, OperationContext operationContext) { + Urn platformInstanceUrn = Utils.platformInstanceUrn(platformInstance); + RecordTemplate warehouseAspect = + entityService.getLatestAspect(operationContext, platformInstanceUrn, "icebergWarehouse"); + + if (warehouseAspect == null) { + throw new RuntimeException("Unknown warehouse"); + } + + IcebergWarehouse icebergWarehouse = new IcebergWarehouse(warehouseAspect.data()); + return new DataHubIcebergWarehouse( + platformInstance, icebergWarehouse, entityService, operationContext); + } + + public CredentialProvider.StorageProviderCredentials getStorageProviderCredentials() { + + Urn clientIdUrn, clientSecretUrn; + String role, region; + + clientIdUrn = icebergWarehouse.getClientId(); + clientSecretUrn = icebergWarehouse.getClientSecret(); + role = icebergWarehouse.getRole(); + region = icebergWarehouse.getRegion(); + + Map> credsMap = + entityService.getLatestAspects( + operationContext, Set.of(clientIdUrn, clientSecretUrn), Set.of("dataHubSecretValue")); + + DataHubSecretValue clientIdValue = + new DataHubSecretValue(credsMap.get(clientIdUrn).get(1).data()); + String clientId = clientIdValue.getValue(); + + DataHubSecretValue clientSecretValue = + new DataHubSecretValue(credsMap.get(clientSecretUrn).get(1).data()); + String clientSecret = clientSecretValue.getValue(); + + return new CredentialProvider.StorageProviderCredentials(clientId, clientSecret, role, region); + } + + public String getDataRoot() { + return icebergWarehouse.getDataRoot(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java new file mode 100644 index 00000000000000..9dd201689185b1 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java @@ -0,0 +1,23 @@ +package com.datahub.iceberg.catalog.rest; + +import jakarta.servlet.http.HttpServletRequest; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.rest.requests.CommitTransactionRequest; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +public class IcebergApiController extends AbstractIcebergController { + + @PostMapping( + value = "/v1/{prefix}/transactions/commit", + consumes = MediaType.APPLICATION_JSON_VALUE) + public void commit( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @RequestBody CommitTransactionRequest commitTransactionRequest) { + log.info("COMMIT REQUEST {} ", commitTransactionRequest); + throw new UnsupportedOperationException(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java new file mode 100644 index 00000000000000..7b96ef1e83dcb3 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java @@ -0,0 +1,21 @@ +package com.datahub.iceberg.catalog.rest; + +import jakarta.servlet.http.HttpServletRequest; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +public class IcebergConfigApiController extends AbstractIcebergController { + @GetMapping(value = "/v1/config", produces = MediaType.APPLICATION_JSON_VALUE) + public ConfigResponse getConfig( + HttpServletRequest request, + @RequestParam(value = "warehouse", required = false) String warehouse) { + log.info("GET CONFIG for warehouse {}", warehouse); + ConfigResponse response = ConfigResponse.builder().withOverride("prefix", warehouse).build(); + log.info("GET CONFIG response: {}", response); + return response; + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java new file mode 100644 index 00000000000000..2926aa0b4769cf --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java @@ -0,0 +1,53 @@ +package com.datahub.iceberg.catalog.rest; + +import com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.iceberg.exceptions.*; +import org.apache.iceberg.rest.responses.ErrorResponse; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ControllerAdvice; +import org.springframework.web.bind.annotation.ExceptionHandler; + +@ControllerAdvice(basePackageClasses = AbstractIcebergController.class) +public class IcebergExceptionHandlerAdvice { + + @ExceptionHandler(AlreadyExistsException.class) + public ResponseEntity handle(AlreadyExistsException e) throws JsonProcessingException { + return err(e, HttpStatus.CONFLICT); + } + + @ExceptionHandler(NoSuchNamespaceException.class) + public ResponseEntity handle(NoSuchNamespaceException e) throws JsonProcessingException { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(NoSuchTableException.class) + public ResponseEntity handle(NoSuchTableException e) throws JsonProcessingException { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(NoSuchViewException.class) + public ResponseEntity handle(NoSuchViewException e) throws JsonProcessingException { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(ForbiddenException.class) + public ResponseEntity handle(ForbiddenException e) throws JsonProcessingException { + return err(e, HttpStatus.FORBIDDEN); + } + + @ExceptionHandler(BadRequestException.class) + public ResponseEntity handle(BadRequestException e) throws JsonProcessingException { + return err(e, HttpStatus.BAD_REQUEST); + } + + private ResponseEntity err(Exception e, HttpStatus errCode) throws JsonProcessingException { + ErrorResponse err = + ErrorResponse.builder() + .responseCode(errCode.value()) + .withMessage(e.getMessage()) + .withType(e.getClass().getSimpleName()) + .build(); + return new ResponseEntity<>(err, errCode); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java new file mode 100644 index 00000000000000..127c9e76a3b41c --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java @@ -0,0 +1,76 @@ +package com.datahub.iceberg.catalog.rest; + +import static com.datahub.iceberg.catalog.Utils.*; + +import com.datahub.iceberg.catalog.DataOperation; +import jakarta.servlet.http.HttpServletRequest; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.rest.requests.CreateNamespaceRequest; +import org.apache.iceberg.rest.responses.CreateNamespaceResponse; +import org.apache.iceberg.rest.responses.GetNamespaceResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +public class IcebergNamespaceApiController extends AbstractIcebergController { + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}", + produces = MediaType.APPLICATION_JSON_VALUE) + public GetNamespaceResponse getNamespace( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace) { + log.info("GET NAMESPACE REQUEST ns {}", namespace); + + GetNamespaceResponse getNamespaceResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false), + catalog -> { + // not supporting properties; simply load to ensure existence + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return GetNamespaceResponse.builder().withNamespace(ns).build(); + }, + null); + + log.info("GET NAMESPACE RESPONSE {}", getNamespaceResponse); + return getNamespaceResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces", + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + public CreateNamespaceResponse createNamespace( + HttpServletRequest request, + @RequestBody @Nonnull CreateNamespaceRequest createNamespaceRequest, + @PathVariable("prefix") String platformInstance) { + log.info("CREATE NAMESPACE REQUEST {} ", createNamespaceRequest); + + CreateNamespaceResponse createNamespaceResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false), + catalog -> { + catalog.createNamespace(createNamespaceRequest.namespace()); + return CreateNamespaceResponse.builder() + .withNamespace(createNamespaceRequest.namespace()) + .build(); + }, + null); + + log.info("CREATE NAMESPACE RESPONSE {}", createNamespaceResponse); + return createNamespaceResponse; + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java new file mode 100644 index 00000000000000..a839eafe1941f2 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java @@ -0,0 +1,28 @@ +package com.datahub.iceberg.catalog.rest; + +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.PropertyAccessor; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import java.util.List; +import org.apache.iceberg.rest.RESTSerializers; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.converter.*; +import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; +import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; + +@Configuration +public class IcebergSpringWebConfig implements WebMvcConfigurer { + @Override + public void extendMessageConverters(List> converters) { + for (HttpMessageConverter converter : converters) { + if (converter instanceof MappingJackson2HttpMessageConverter jsonConverter) { + ObjectMapper objectMapper = jsonConverter.getObjectMapper(); + objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); + objectMapper.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); + RESTSerializers.registerAll(objectMapper); + break; + } + } + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java new file mode 100644 index 00000000000000..650e054eaeb461 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java @@ -0,0 +1,235 @@ +package com.datahub.iceberg.catalog.rest; + +import static com.datahub.iceberg.catalog.Utils.*; + +import com.datahub.iceberg.catalog.CredentialProvider; +import com.datahub.iceberg.catalog.DataOperation; +import com.linkedin.metadata.authorization.PoliciesConfig; +import jakarta.servlet.http.HttpServletRequest; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.requests.CreateTableRequest; +import org.apache.iceberg.rest.requests.RegisterTableRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +public class IcebergTableApiController extends AbstractIcebergController { + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse createTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody CreateTableRequest createTableRequest, + @RequestHeader(value = "X-Iceberg-Access-Delegation") String xIcebergAccessDelegation) { + log.info("CREATE TABLE REQUEST {}", createTableRequest); + + LoadTableResponse createTableResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + if (createTableRequest.stageCreate()) { + return CatalogHandlers.stageTableCreate(catalog, ns, createTableRequest); + } else { + return CatalogHandlers.createTable(catalog, ns, createTableRequest); + } + }, + catalogOperationResult -> { + log.info( + "CREATE TABLE RESPONSE, excluding creds, {}", + catalogOperationResult.getResponse()); + return includeCreds( + platformInstance, + xIcebergAccessDelegation, + catalogOperationResult.getResponse(), + PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, + catalogOperationResult.getStorageProviderCredentials()); + }); + + return createTableResponse; + } + + private LoadTableResponse includeCreds( + String platformInstance, + String xIcebergAccessDelegation, + LoadTableResponse loadTableResponse, + PoliciesConfig.Privilege privilege, + CredentialProvider.StorageProviderCredentials storageProviderCredentials) { + if ("vended-credentials".equals(xIcebergAccessDelegation)) { + CredentialProvider.CredentialsCacheKey cacheKey = + new CredentialProvider.CredentialsCacheKey( + platformInstance, privilege, locations(loadTableResponse.tableMetadata())); + Map creds = credentialProvider.get(cacheKey, storageProviderCredentials); + log.info( + "STS creds {} for primary table location {}", + creds, + loadTableResponse.tableMetadata().location()); + + return LoadTableResponse.builder() + .withTableMetadata(loadTableResponse.tableMetadata()) + .addAllConfig(creds) + .build(); + } else { + return loadTableResponse; + } + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", + produces = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse loadTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestHeader(value = "X-Iceberg-Access-Delegation", required = false) + String xIcebergAccessDelegation, + @RequestParam(value = "snapshots", required = false) String snapshots) { + log.info( + "GET TABLE REQUEST {} {}.{} ; access-delegation: {}", + platformInstance, + namespace, + table, + xIcebergAccessDelegation); + + LoadTableResponse getTableResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, + platformInstance, + tableIdFromString(namespace, table), + DataOperation.READ_ONLY, + true), + catalog -> CatalogHandlers.loadTable(catalog, tableIdFromString(namespace, table)), + catalogOperationResult -> { + log.info( + "GET TABLE RESPONSE, excluding creds, {}", catalogOperationResult.getResponse()); + PoliciesConfig.Privilege privilege = catalogOperationResult.getPrivilege(); + if (privilege == PoliciesConfig.DATA_MANAGE_TABLES_PRIVILEGE) { + privilege = PoliciesConfig.DATA_READ_WRITE_PRIVILEGE; + } else if (privilege == PoliciesConfig.DATA_MANAGE_VIEWS_PRIVILEGE) { + privilege = PoliciesConfig.DATA_READ_ONLY_PRIVILEGE; + } + return includeCreds( + platformInstance, + xIcebergAccessDelegation, + catalogOperationResult.getResponse(), + privilege, + catalogOperationResult.getStorageProviderCredentials()); + }); + + return getTableResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse updateTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestBody UpdateTableRequest updateTableRequest) { + + log.info("UPDATE TABLE REQUEST {}.{}, body {} ", namespace, table, updateTableRequest); + + LoadTableResponse updateTableResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, + platformInstance, + tableIdFromString(namespace, table), + DataOperation.READ_WRITE, + false), + catalog -> + CatalogHandlers.updateTable( + catalog, tableIdFromString(namespace, table), updateTableRequest), + null); + + // not refreshing credentials here. + log.info("UPDATE TABLE RESPONSE {}", updateTableResponse); + + return updateTableResponse; + } + + @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}") + public void dropTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestParam(value = "purgeRequested", defaultValue = "false") Boolean purgeRequested) { + + log.info("DROP TABLE REQUEST ns {} table {}", namespace, table); + + catalogOperation( + platformInstance, + request, + operationContext -> + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), + catalog -> { + TableIdentifier tableIdentifier = tableIdFromString(namespace, table); + if (purgeRequested) { + CatalogHandlers.purgeTable(catalog, tableIdentifier); + log.info("PURGED TABLE {}", tableIdentifier); + } else { + CatalogHandlers.dropTable(catalog, tableIdentifier); + log.info("DROPPED TABLE {}", tableIdentifier); + } + return null; + }, + null); + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/register", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse registerTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody RegisterTableRequest registerTableRequest) { + log.info("REGISTER TABLE REQUEST {}", registerTableRequest); + + LoadTableResponse registerTableResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.registerTable(catalog, ns, registerTableRequest); + }, + null); + + log.info("REGISTER TABLE RESPONSE {}", registerTableResponse); + return registerTableResponse; + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java new file mode 100644 index 00000000000000..262efc72eba56b --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java @@ -0,0 +1,136 @@ +package com.datahub.iceberg.catalog.rest; + +import static com.datahub.iceberg.catalog.Utils.*; + +import com.datahub.iceberg.catalog.DataOperation; +import jakarta.servlet.http.HttpServletRequest; +import java.net.URLEncoder; +import java.nio.charset.Charset; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.RESTUtil; +import org.apache.iceberg.rest.requests.CreateViewRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.LoadViewResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +public class IcebergViewApiController extends AbstractIcebergController { + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse createView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody CreateViewRequest createViewRequest) { + log.info("CREATE VIEW REQUEST {}", createViewRequest); + + LoadViewResponse createViewResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false), + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.createView(catalog, ns, createViewRequest); + }, + null); + + log.info("CREATE VIEW RESPONSE {}", createViewResponse); + return createViewResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse updateView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view, + @RequestBody UpdateTableRequest updateViewRequest) { + log.info("UPDATE VIEW REQUEST {}.{}, body {} ", namespace, view, updateViewRequest); + + LoadViewResponse updateViewResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, + platformInstance, + tableIdFromString(namespace, view), + DataOperation.MANAGE_VIEWS, + false), + catalog -> + CatalogHandlers.updateView( + catalog, tableIdFromString(namespace, view), updateViewRequest), + null); + + log.info("UPDATE VIEW RESPONSE {}", updateViewResponse); + return updateViewResponse; + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", + produces = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse loadView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view) { + log.info("GET VIEW REQUEST {} {}.{}", platformInstance, namespace, view); + + Namespace ns = RESTUtil.decodeNamespace(URLEncoder.encode(namespace, Charset.defaultCharset())); + TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(view)); + LoadViewResponse getViewResponse = + catalogOperation( + platformInstance, + request, + operationContext -> + authorize( + operationContext, + platformInstance, + tableIdFromString(namespace, view), + DataOperation.READ_ONLY, + false), + catalog -> CatalogHandlers.loadView(catalog, tableIdFromString(namespace, view)), + null); + log.info("LOAD VIEW RESPONSE {}", getViewResponse); + return getViewResponse; + } + + @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/views/{view}") + public void dropView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view) { + log.info("DROP VIEW REQUEST ns {} table {}", namespace, view); + Namespace ns = RESTUtil.decodeNamespace(URLEncoder.encode(namespace, Charset.defaultCharset())); + TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(view)); + + catalogOperation( + platformInstance, + request, + operationContext -> + authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false), + catalog -> { + CatalogHandlers.dropView(catalog, tableIdFromString(namespace, view)); + return null; + }, + null); + log.info("DROPPED VIEW {}", tableIdentifier); + } +} diff --git a/metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml b/metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index d83b29c4ad82bc..b173a1dd607f3a 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -23,6 +23,7 @@ dependencies { implementation project(':metadata-service:openapi-entity-servlet') implementation project(':metadata-service:openapi-analytics-servlet') implementation project(':metadata-service:schema-registry-servlet') + implementation project(':metadata-service:iceberg-catalog') runtimeOnly project(':metadata-jobs:mce-consumer') runtimeOnly project(':metadata-jobs:mae-consumer') runtimeOnly project(':metadata-jobs:pe-consumer') diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java new file mode 100644 index 00000000000000..8f45395832e368 --- /dev/null +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java @@ -0,0 +1,10 @@ +package com.linkedin.gms.servlet; + +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.servlet.config.annotation.EnableWebMvc; + +@EnableWebMvc +@ComponentScan(basePackages = {"com.datahub.iceberg.catalog"}) +@Configuration +public class IcebergCatalogServletConfig {} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index d3236180917aa4..1f18bf74ec6029 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -406,6 +406,30 @@ public class PoliciesConfig { "Edit Data Contract", "The ability to edit the Data Contract for an entity."); + public static final Privilege DATA_READ_ONLY_PRIVILEGE = + Privilege.of( + "DATA_READ_ONLY", + "Read only data-access", + "The ability to read the data in a dataset."); + + public static final Privilege DATA_READ_WRITE_PRIVILEGE = + Privilege.of( + "DATA_READ_WRITE", + "Read-write data-access", + "The ability to read & write the data in a dataset."); + + public static final Privilege DATA_MANAGE_TABLES_PRIVILEGE = + Privilege.of("DATA_MANAGE_TABLES", "Manage tables", "The ability to create and drop tables."); + + public static final Privilege DATA_MANAGE_VIEWS_PRIVILEGE = + Privilege.of("DATA_MANAGE_VIEWS", "Manage views", "The ability to create and drop views."); + + public static final Privilege DATA_MANAGE_NAMESPACES_PRIVILEGE = + Privilege.of( + "DATA_MANAGE_NAMESPACES", + "Manage namespaces", + "The ability to create and drop namespaces."); + // Tag Privileges public static final Privilege EDIT_TAG_COLOR_PRIVILEGE = Privilege.of("EDIT_TAG_COLOR", "Edit Tag Color", "The ability to change the color of a Tag."); @@ -540,7 +564,9 @@ public class PoliciesConfig { EDIT_LINEAGE_PRIVILEGE, EDIT_ENTITY_EMBED_PRIVILEGE, EDIT_QUERIES_PRIVILEGE, - CREATE_ER_MODEL_RELATIONSHIP_PRIVILEGE)) + CREATE_ER_MODEL_RELATIONSHIP_PRIVILEGE, + DATA_READ_ONLY_PRIVILEGE, + DATA_READ_WRITE_PRIVILEGE)) .flatMap(Collection::stream) .collect(Collectors.toList())); @@ -764,6 +790,16 @@ public class PoliciesConfig { "A logical collection of versioned entities.", COMMON_ENTITY_PRIVILEGES); + public static final ResourcePrivileges PLATFORM_INSTANCE_PRIVILEGES = + ResourcePrivileges.of( + "dataPlatformInstance", + "Data Platform Instance", + "Data Platform Instances on Datahub", + ImmutableList.of( + DATA_MANAGE_VIEWS_PRIVILEGE, + DATA_MANAGE_TABLES_PRIVILEGE, + DATA_MANAGE_NAMESPACES_PRIVILEGE)); + public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( DATASET_PRIVILEGES, @@ -783,7 +819,8 @@ public class PoliciesConfig { ER_MODEL_RELATIONSHIP_PRIVILEGES, BUSINESS_ATTRIBUTE_PRIVILEGES, STRUCTURED_PROPERTIES_PRIVILEGES, - VERSION_SET_PRIVILEGES); + VERSION_SET_PRIVILEGES, + PLATFORM_INSTANCE_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/settings.gradle b/settings.gradle index 437a353f210ac4..a3c8544d8dbc37 100644 --- a/settings.gradle +++ b/settings.gradle @@ -78,10 +78,13 @@ include ':metadata-operation-context' include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +include ':metadata-service:iceberg-catalog' buildCache { def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { url = 'https://cache.depot.dev' enabled = depotSecret != null @@ -120,7 +123,7 @@ def installPreCommitHooks() { println "Install output: ${stdout}" } catch (Exception e) { println "Error installing pre-commit: ${e.message}" - return + return } } From 965633afc71cfe54f6f1a86bc4043b21c23945d5 Mon Sep 17 00:00:00 2001 From: Chakravarthy Racharla Date: Wed, 8 Jan 2025 18:46:56 +0530 Subject: [PATCH 2/3] add iceberg warehouse management and cli --- datahub-web-react/src/graphql/search.graphql | 3 + docs/advanced/mcp-mcl.md | 6 + .../java/com/linkedin/metadata/Constants.java | 7 + .../examples/iceberg/constants.py | 3 + .../examples/iceberg/create_table.py | 108 +++ .../examples/iceberg/drop_table.py | 10 + .../examples/iceberg/folder_operations.py | 218 ++++++ .../examples/iceberg/provision_warehouse.py | 48 ++ .../examples/iceberg/read_table.py | 15 + .../examples/iceberg/requirements.txt | 3 + metadata-ingestion/setup.py | 3 + .../src/datahub/cli/iceberg_cli.py | 707 ++++++++++++++++++ metadata-ingestion/src/datahub/entrypoints.py | 2 + .../metadata/entity/EntityServiceImpl.java | 32 +- .../metadata/entity/EntityServiceTest.java | 91 +++ ...Warehouse.pdl => IcebergWarehouseInfo.pdl} | 15 +- .../linkedin/dataset/IcebergCatalogInfo.pdl | 25 + .../com/linkedin/dataset/IcebergMetadata.pdl | 21 - .../src/main/resources/entity-registry.yml | 4 +- .../src/main/resources/application.yaml | 6 +- metadata-service/iceberg-catalog/build.gradle | 42 +- .../iceberg-catalog/pyproject.toml | 46 ++ .../iceberg-catalog/requirements.txt | 16 + .../src/integrationTest/integration_test.py | 243 ++++++ .../iceberg/catalog/CredentialProvider.java | 50 -- .../iceberg/catalog/DataHubRestCatalog.java | 275 ------- .../iceberg/catalog/DataHubTableOps.java | 216 ------ .../iceberg/catalog/DataHubViewOps.java | 243 ------ .../iceberg/catalog/IcebergApiController.java | 1 - .../catalog/rest/DataHubIcebergWarehouse.java | 80 -- .../rest/IcebergExceptionHandlerAdvice.java | 53 -- .../rest/IcebergNamespaceApiController.java | 76 -- .../rest/IcebergTableApiController.java | 235 ------ .../rest/IcebergViewApiController.java | 136 ---- .../catalog/DataHubIcebergWarehouse.java | 275 +++++++ .../iceberg/catalog/DataHubRestCatalog.java | 487 ++++++++++++ .../iceberg/catalog/DataHubTableOps.java | 54 ++ .../iceberg/catalog/DataHubViewOps.java | 61 ++ .../iceberg/catalog/DataOperation.java | 6 +- .../iceberg/catalog/FileIOFactory.java | 2 +- .../catalog/TableOrViewOpsDelegate.java | 478 ++++++++++++ .../iceberg/catalog/Utils.java | 39 +- .../CachingCredentialProvider.java | 32 + .../credentials/CredentialProvider.java | 30 + .../credentials}/S3CredentialProvider.java | 58 +- .../common/IcebergExceptionHandlerAdvice.java | 74 ++ .../rest/common}/IcebergSpringWebConfig.java | 16 +- .../rest/open/PublicIcebergApiController.java | 107 +++ .../secure}/AbstractIcebergController.java | 105 +-- .../rest/secure}/IcebergApiController.java | 3 +- .../secure}/IcebergConfigApiController.java | 8 +- .../secure/IcebergNamespaceApiController.java | 171 +++++ .../secure/IcebergTableApiController.java | 323 ++++++++ .../rest/secure/IcebergViewApiController.java | 213 ++++++ .../CachingCredentialProviderTest.java | 107 +++ .../catalog/DataHubIcebergWarehouseTest.java | 428 +++++++++++ .../catalog/DataHubRestCatalogTest.java | 394 ++++++++++ .../catalog/S3CredentialProviderTest.java | 126 ++++ .../iceberg/catalog/UtilsTest.java | 143 ++++ .../IcebergExceptionHandlerAdviceTest.java | 147 ++++ .../open/PublicIcebergApiControllerTest.java | 141 ++++ .../rest/secure/AbstractControllerTest.java | 161 ++++ .../IcebergConfigApiControllerTest.java | 56 ++ .../IcebergNamespaceApiControllerTest.java | 92 +++ .../tests/resources/iceberg-warehouse1.yaml | 0 .../GlobalControllerExceptionHandler.java | 3 +- .../servlet/IcebergCatalogServletConfig.java | 2 +- .../authorization/PoliciesConfig.java | 13 +- 68 files changed, 5881 insertions(+), 1513 deletions(-) create mode 100644 metadata-ingestion/examples/iceberg/constants.py create mode 100644 metadata-ingestion/examples/iceberg/create_table.py create mode 100644 metadata-ingestion/examples/iceberg/drop_table.py create mode 100644 metadata-ingestion/examples/iceberg/folder_operations.py create mode 100644 metadata-ingestion/examples/iceberg/provision_warehouse.py create mode 100644 metadata-ingestion/examples/iceberg/read_table.py create mode 100644 metadata-ingestion/examples/iceberg/requirements.txt create mode 100644 metadata-ingestion/src/datahub/cli/iceberg_cli.py rename metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/{IcebergWarehouse.pdl => IcebergWarehouseInfo.pdl} (67%) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergCatalogInfo.pdl delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl create mode 100644 metadata-service/iceberg-catalog/pyproject.toml create mode 100644 metadata-service/iceberg-catalog/requirements.txt create mode 100644 metadata-service/iceberg-catalog/src/integrationTest/integration_test.py delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java delete mode 100644 metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouse.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubRestCatalog.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubTableOps.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubViewOps.java rename metadata-service/iceberg-catalog/src/main/java/{com/datahub => io/datahubproject}/iceberg/catalog/DataOperation.java (87%) rename metadata-service/iceberg-catalog/src/main/java/{com/datahub => io/datahubproject}/iceberg/catalog/FileIOFactory.java (90%) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java rename metadata-service/iceberg-catalog/src/main/java/{com/datahub => io/datahubproject}/iceberg/catalog/Utils.java (77%) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CachingCredentialProvider.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CredentialProvider.java rename metadata-service/iceberg-catalog/src/main/java/{com/datahub/iceberg/catalog => io/datahubproject/iceberg/catalog/credentials}/S3CredentialProvider.java (77%) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdvice.java rename metadata-service/iceberg-catalog/src/main/java/{com/datahub/iceberg/catalog/rest => io/datahubproject/iceberg/catalog/rest/common}/IcebergSpringWebConfig.java (66%) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiController.java rename metadata-service/iceberg-catalog/src/main/java/{com/datahub/iceberg/catalog/rest => io/datahubproject/iceberg/catalog/rest/secure}/AbstractIcebergController.java (64%) rename metadata-service/iceberg-catalog/src/main/java/{com/datahub/iceberg/catalog/rest => io/datahubproject/iceberg/catalog/rest/secure}/IcebergApiController.java (90%) rename metadata-service/iceberg-catalog/src/main/java/{com/datahub/iceberg/catalog/rest => io/datahubproject/iceberg/catalog/rest/secure}/IcebergConfigApiController.java (74%) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergTableApiController.java create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergViewApiController.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/CachingCredentialProviderTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouseTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubRestCatalogTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/S3CredentialProviderTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/UtilsTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdviceTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiControllerTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractControllerTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiControllerTest.java create mode 100644 metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiControllerTest.java delete mode 100644 metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml diff --git a/datahub-web-react/src/graphql/search.graphql b/datahub-web-react/src/graphql/search.graphql index 7c8d670b535e8c..c36e3038adca55 100644 --- a/datahub-web-react/src/graphql/search.graphql +++ b/datahub-web-react/src/graphql/search.graphql @@ -482,6 +482,9 @@ fragment searchResultsWithoutSchemaField on Entity { } } } + ... on DataPlatformInstance { + ...dataPlatformInstanceFields + } ... on Role { id properties { diff --git a/docs/advanced/mcp-mcl.md b/docs/advanced/mcp-mcl.md index 3a06b2abadc115..df8305b2f494fd 100644 --- a/docs/advanced/mcp-mcl.md +++ b/docs/advanced/mcp-mcl.md @@ -210,6 +210,7 @@ A writer can specify that the aspect must NOT have been modified after a specifi `If-Modified-Since` A writer can specify that the aspect must have been modified after a specific time, following [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since) http headers. + #### Change Types: [`CREATE`, `CREATE_ENTITY`] Another form of conditional writes which considers the existence of an aspect or entity uses the following Change Types. @@ -221,3 +222,8 @@ Another form of conditional writes which considers the existence of an aspect or By default, a validation exception is thrown if the `CREATE`/`CREATE_ENTITY` constraint is violated. If the write operation should be dropped without considering it an exception, then add the following header: `If-None-Match: *` to the MCP. +### Synchronous ElasticSearch Updates + +The writes to the elasticsearch are asynchronous by default. A writer can add a custom header +`X-DataHub-Sync-Index-Update` to the MCP `headers` with value set to `true` to enable a synchronous update of +elasticsearch for specific MCPs that may benefit from it. diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 463376edcdf259..80012bc24724cb 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -10,6 +10,9 @@ public class Constants { public static final String INTERNAL_DELEGATED_FOR_ACTOR_HEADER_NAME = "X-DataHub-Delegated-For"; public static final String INTERNAL_DELEGATED_FOR_ACTOR_TYPE = "X-DataHub-Delegated-For-"; + // Use on specific MCP to request an synchronous index update avoid the kafka lag. + public static final String SYNC_INDEX_UPDATE_HEADER_NAME = "X-DataHub-Sync-Index-Update"; + public static final String URN_LI_PREFIX = "urn:li:"; public static final String DATAHUB_ACTOR = "urn:li:corpuser:datahub"; // Super user. public static final String SYSTEM_ACTOR = @@ -103,6 +106,7 @@ public class Constants { public static final String FORM_ENTITY_NAME = "form"; public static final String RESTRICTED_ENTITY_NAME = "restricted"; public static final String BUSINESS_ATTRIBUTE_ENTITY_NAME = "businessAttribute"; + public static final String PLATFORM_RESOURCE_ENTITY_NAME = "platformResource"; /** Aspects */ // Common @@ -211,6 +215,9 @@ public class Constants { public static final String DATA_PLATFORM_INSTANCE_PROPERTIES_ASPECT_NAME = "dataPlatformInstanceProperties"; + // PlatformResource + public static final String PLATFORM_RESOURCE_INFO_ASPECT_NAME = "platformResourceInfo"; + // ML Feature public static final String ML_FEATURE_KEY_ASPECT_NAME = "mlFeatureKey"; public static final String ML_FEATURE_PROPERTIES_ASPECT_NAME = "mlFeatureProperties"; diff --git a/metadata-ingestion/examples/iceberg/constants.py b/metadata-ingestion/examples/iceberg/constants.py new file mode 100644 index 00000000000000..183291a84fec2b --- /dev/null +++ b/metadata-ingestion/examples/iceberg/constants.py @@ -0,0 +1,3 @@ +warehouse = "arctic_warehouse" +namespace = "alpine_db" +table_name = "resort_metrics" diff --git a/metadata-ingestion/examples/iceberg/create_table.py b/metadata-ingestion/examples/iceberg/create_table.py new file mode 100644 index 00000000000000..c2cf823c724158 --- /dev/null +++ b/metadata-ingestion/examples/iceberg/create_table.py @@ -0,0 +1,108 @@ +from datetime import datetime + +import pyarrow as pa +import pyiceberg +from constants import namespace, table_name, warehouse +from pyiceberg.catalog import load_catalog +from pyiceberg.schema import Schema +from pyiceberg.types import LongType, NestedField, StringType, TimestampType + +from datahub.ingestion.graph.client import get_default_graph + +# Define a more comprehensive schema for ski resort data +schema = Schema( + NestedField( + field_id=1, + name="resort_id", + field_type=LongType(), + required=True, + doc="Unique identifier for each ski resort", + initial_default=None, + ), + NestedField( + field_id=2, + name="resort_name", + field_type=StringType(), + required=True, + doc="Official name of the ski resort", + initial_default=None, + ), + NestedField( + field_id=3, + name="daily_snowfall", + field_type=LongType(), + required=False, + doc="Amount of new snow in inches during the last 24 hours. Null if no measurement available", + initial_default=0, + ), + NestedField( + field_id=4, + name="conditions", + field_type=StringType(), + required=False, + doc="Current snow conditions description (e.g., 'Powder', 'Packed Powder', 'Groomed'). Null if not reported", + initial_default=None, + ), + NestedField( + field_id=5, + name="last_updated", + field_type=TimestampType(), + required=False, + doc="Timestamp of when the snow report was last updated", + initial_default=None, + ), +) + +# Load the catalog with new warehouse name +graph = get_default_graph() +catalog = load_catalog("local_datahub", warehouse=warehouse, token=graph.config.token) + +# Create namespace (database) +try: + catalog.create_namespace(namespace) +except Exception as e: + print(f"Namespace creation error (might already exist): {e}") + +full_table_name = f"{namespace}.{table_name}" +try: + catalog.create_table(full_table_name, schema) +except pyiceberg.exceptions.TableAlreadyExistsError: + print(f"Table {full_table_name} already exists") + +# Create sample data with explicit PyArrow schema to match required fields +pa_schema = pa.schema( + [ + ("resort_id", pa.int64(), False), # False means not nullable + ("resort_name", pa.string(), False), # False means not nullable + ("daily_snowfall", pa.int64(), True), + ("conditions", pa.string(), True), + ("last_updated", pa.timestamp("us"), True), + ] +) +# Create sample data +sample_data = pa.Table.from_pydict( + { + "resort_id": [1, 2, 3], + "resort_name": ["Snowpeak Resort", "Alpine Valley", "Glacier Heights"], + "daily_snowfall": [12, 8, 15], + "conditions": ["Powder", "Packed", "Fresh Powder"], + "last_updated": [ + pa.scalar(datetime.now()), + pa.scalar(datetime.now()), + pa.scalar(datetime.now()), + ], + }, + schema=pa_schema, +) + +# Write data to table +table = catalog.load_table(full_table_name) +table.overwrite(sample_data) + +table.refresh() +# Read and verify data +con = table.scan().to_duckdb(table_name=f"{table_name}") +print("\nResort Metrics Data:") +print("-" * 50) +for row in con.execute(f"SELECT * FROM {table_name}").fetchall(): + print(row) diff --git a/metadata-ingestion/examples/iceberg/drop_table.py b/metadata-ingestion/examples/iceberg/drop_table.py new file mode 100644 index 00000000000000..a4a60dd94aca48 --- /dev/null +++ b/metadata-ingestion/examples/iceberg/drop_table.py @@ -0,0 +1,10 @@ +from constants import namespace, table_name, warehouse +from pyiceberg.catalog import load_catalog + +# Load the catalog +from datahub.ingestion.graph.client import get_default_graph + +graph = get_default_graph() +catalog = load_catalog("local_datahub", warehouse=warehouse, token=graph.config.token) +# Append the data to the Iceberg table +catalog.drop_table(f"{namespace}.{table_name}") diff --git a/metadata-ingestion/examples/iceberg/folder_operations.py b/metadata-ingestion/examples/iceberg/folder_operations.py new file mode 100644 index 00000000000000..306fb3ff4d205f --- /dev/null +++ b/metadata-ingestion/examples/iceberg/folder_operations.py @@ -0,0 +1,218 @@ +""" +This script is designed to manage and clean up contents in an S3 bucket, specifically targeting orphaned files and folders. +It provides functionality to list, delete, or simulate deletion of all objects under a specified S3 prefix using AWS assumed role credentials. + +The script supports the following operations: +- Listing all files and folders under a specified S3 path. +- Deleting all contents under a specified S3 path. +- Performing a dry run to show what would be deleted without actually deleting the objects. + +Environment variables required: +- DH_ICEBERG_AWS_ROLE: The ARN of the AWS role to assume. +- DH_ICEBERG_CLIENT_ID: The AWS client ID. +- DH_ICEBERG_CLIENT_SECRET: The AWS client secret. + +Usage: + python folder_operations.py s3://bucket/prefix --list + python folder_operations.py s3://bucket/prefix --nuke + python folder_operations.py s3://bucket/prefix --dry-run + +Arguments: +- s3_path: The S3 path to operate on (e.g., s3://bucket/prefix). +- --list: List all folders and files. +- --nuke: Delete all contents. +- --dry-run: Show what would be deleted without actually deleting. +- --region: AWS region (default: us-east-1). + +Note: Only one action (--list, --nuke, or --dry-run) can be specified at a time. + +""" + +import argparse +import os +from datetime import datetime +from typing import Optional, Tuple + +import boto3 +from mypy_boto3_s3 import S3Client + + +def get_s3_client_with_role( + client_id: str, + client_secret: str, + role_arn: str, + region: str = "us-east-1", + session_name: str = "IcebergSession", +) -> Tuple[S3Client, datetime]: # type: ignore + """ + Create an S3 client with assumed role credentials. + """ + session = boto3.Session( + aws_access_key_id=client_id, + aws_secret_access_key=client_secret, + region_name=region, + ) + + sts_client = session.client("sts") + + assumed_role_object = sts_client.assume_role( + RoleArn=role_arn, RoleSessionName=session_name + ) + + credentials = assumed_role_object["Credentials"] + + s3_client: S3Client = boto3.client( + "s3", + region_name=region, + aws_access_key_id=credentials["AccessKeyId"], + aws_secret_access_key=credentials["SecretAccessKey"], + aws_session_token=credentials["SessionToken"], + ) + + return s3_client, credentials["Expiration"] + + +def delete_s3_objects( + s3_client: S3Client, bucket_name: str, prefix: str, dry_run: bool = False +) -> None: + """ + Delete all objects under the specified prefix. + """ + paginator = s3_client.get_paginator("list_objects_v2") + + for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix): + objects_to_delete = [] + for obj in page.get("Contents", []): + objects_to_delete.append({"Key": obj["Key"]}) + if dry_run: + print(f"Would delete: {obj['Key']}") + print(f" Size: {obj['Size'] / (1024 * 1024):.2f} MB") + print(f" Last Modified: {obj['LastModified']}") + + if objects_to_delete and not dry_run: + s3_client.delete_objects( + Bucket=bucket_name, + Delete={"Objects": objects_to_delete}, # type: ignore + ) + print(f"Deleted {len(objects_to_delete)} objects") + + +def list_s3_contents( + s3_path: str, + client_id: str, + client_secret: str, + role_arn: str, + region: str = "us-east-1", + delimiter: Optional[str] = None, + nuke: bool = False, + dry_run: bool = False, +) -> None: + """ + List or delete contents of an S3 path using assumed role credentials. + """ + if not s3_path.startswith("s3://"): + raise ValueError("S3 path must start with 's3://'") + + bucket_name = s3_path.split("/")[2] + prefix = "/".join(s3_path.split("/")[3:]) + if prefix and not prefix.endswith("/"): + prefix += "/" + + s3_client, expiration = get_s3_client_with_role( + client_id=client_id, + client_secret=client_secret, + role_arn=role_arn, + region=region, + ) + + operation = "Deleting" if nuke else "Would delete" if dry_run else "Listing" + print(f"\n{operation} contents of {s3_path}") + print(f"Using role: {role_arn}") + print(f"Credentials expire at: {expiration}") + print("-" * 60) + + if nuke or dry_run: + delete_s3_objects(s3_client, bucket_name, prefix, dry_run) + return + + paginator = s3_client.get_paginator("list_objects_v2") + + list_params = {"Bucket": bucket_name, "Prefix": prefix} + if delimiter: + list_params["Delimiter"] = delimiter + + try: + pages = paginator.paginate(**list_params) # type: ignore + found_contents = False + + for page in pages: + if delimiter and "CommonPrefixes" in page: + for common_prefix in page.get("CommonPrefixes", []): + found_contents = True + folder_name = common_prefix["Prefix"][len(prefix) :].rstrip("/") + print(f"📁 {folder_name}/") + + for obj in page.get("Contents", []): + found_contents = True + file_path = obj["Key"][len(prefix) :] + if file_path: + size_mb = obj["Size"] / (1024 * 1024) + print(f"📄 {file_path}") + print(f" Size: {size_mb:.2f} MB") + print(f" Last Modified: {obj['LastModified']}") + + if not found_contents: + print("No contents found in the specified path.") + + except Exception as e: + print(f"Error accessing contents: {str(e)}") + + +def main(): + parser = argparse.ArgumentParser(description="S3 Content Manager") + parser.add_argument( + "s3_path", help="S3 path to operate on (e.g., s3://bucket/prefix)" + ) + parser.add_argument( + "--list", action="store_true", help="List all folders and files" + ) + parser.add_argument("--nuke", action="store_true", help="Delete all contents") + parser.add_argument( + "--dry-run", + action="store_true", + help="Show what would be deleted without actually deleting", + ) + parser.add_argument( + "--region", default="us-east-1", help="AWS region (default: us-east-1)" + ) + + args = parser.parse_args() + + # Get environment variables + role_arn = os.environ.get("DH_ICEBERG_AWS_ROLE") + client_id = os.environ.get("DH_ICEBERG_CLIENT_ID") + client_secret = os.environ.get("DH_ICEBERG_CLIENT_SECRET") + + if not all([role_arn, client_id, client_secret]): + raise ValueError( + "Missing required environment variables. Please set DH_ICEBERG_AWS_ROLE, DH_ICEBERG_CLIENT_ID, and DH_ICEBERG_CLIENT_SECRET" + ) + + # Validate arguments + if sum([args.list, args.nuke, args.dry_run]) != 1: + parser.error("Please specify exactly one action: --list, --nuke, or --dry-run") + + list_s3_contents( + args.s3_path, + client_id=client_id, # type: ignore + client_secret=client_secret, # type: ignore + role_arn=role_arn, # type: ignore + region=args.region, + # delimiter='/' if args.list else None, + nuke=args.nuke, + dry_run=args.dry_run, + ) + + +if __name__ == "__main__": + main() diff --git a/metadata-ingestion/examples/iceberg/provision_warehouse.py b/metadata-ingestion/examples/iceberg/provision_warehouse.py new file mode 100644 index 00000000000000..58cc1e850680e7 --- /dev/null +++ b/metadata-ingestion/examples/iceberg/provision_warehouse.py @@ -0,0 +1,48 @@ +""" +A script to provision a warehouse on DataHub (and Iceberg). + +This script uses environment variables to configure the Iceberg client and +provision a warehouse on DataHub. The required environment variables are: +- DH_ICEBERG_CLIENT_ID: The client ID for the Icebreaker service. +- DH_ICEBERG_CLIENT_SECRET: The client secret for the Icebreaker service. +- DH_ICEBERG_AWS_ROLE: The test role for the Icebreaker service. +- DH_ICEBERG_DATA_ROOT: The root directory for Icebreaker data. + +The script asserts the presence of these environment variables and then +executes a system command to create the warehouse using the DataHub Iceberg CLI. + +Usage: + Ensure the required environment variables are set, then run the script. + +Example: + $ export DH_ICEBERG_CLIENT_ID="your_client_id" + $ export DH_ICEBERG_CLIENT_SECRET="your_client_secret" + $ export DH_ICEBERG_AWS_ROLE="your_test_role" + $ export DH_ICEBERG_DATA_ROOT="your_data_root" + $ python provision_warehouse.py +""" + +import os + +from constants import warehouse + +# Assert that env variables are present + +assert os.environ.get("DH_ICEBERG_CLIENT_ID"), ( + "DH_ICEBERG_CLIENT_ID variable is not present" +) +assert os.environ.get("DH_ICEBERG_CLIENT_SECRET"), ( + "DH_ICEBERG_CLIENT_SECRET variable is not present" +) +assert os.environ.get("DH_ICEBERG_AWS_ROLE"), ( + "DH_ICEBERG_AWS_ROLE variable is not present" +) +assert os.environ.get("DH_ICEBERG_DATA_ROOT"), ( + "DH_ICEBERG_DATA_ROOT variable is not present" +) + +assert os.environ.get("DH_ICEBERG_DATA_ROOT", "").startswith("s3://") + +os.system( + f"datahub iceberg create --warehouse {warehouse} --data_root $DH_ICEBERG_DATA_ROOT/{warehouse} --client_id $DH_ICEBERG_CLIENT_ID --client_secret $DH_ICEBERG_CLIENT_SECRET --region 'us-east-1' --role $DH_ICEBERG_AWS_ROLE" +) diff --git a/metadata-ingestion/examples/iceberg/read_table.py b/metadata-ingestion/examples/iceberg/read_table.py new file mode 100644 index 00000000000000..3678951cd5710f --- /dev/null +++ b/metadata-ingestion/examples/iceberg/read_table.py @@ -0,0 +1,15 @@ +from constants import namespace, table_name, warehouse +from pyiceberg.catalog import load_catalog + +# Load the catalog +from datahub.ingestion.graph.client import get_default_graph + +graph = get_default_graph() + +catalog = load_catalog("local_datahub", warehouse=warehouse, token=graph.config.token) +# Append the data to the Iceberg table +table = catalog.load_table(f"{namespace}.{table_name}") +con = table.scan().to_duckdb(table_name=f"{table_name}") + +for row in con.execute(f"SELECT * FROM {table_name}").fetchall(): + print(row) diff --git a/metadata-ingestion/examples/iceberg/requirements.txt b/metadata-ingestion/examples/iceberg/requirements.txt new file mode 100644 index 00000000000000..be79cbc80c647b --- /dev/null +++ b/metadata-ingestion/examples/iceberg/requirements.txt @@ -0,0 +1,3 @@ +# The code in this directory requires the latest pyiceberg +pyiceberg >= 0.8.1 +pyarrow >= 19.0.0 \ No newline at end of file diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index f7e6482fd26f87..628c7156dadd5c 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -448,6 +448,7 @@ | pyhive_common | {"psycopg2-binary", "pymysql>=1.0.2"}, "iceberg": iceberg_common, + "iceberg-catalog": aws_common, "json-schema": set(), "kafka": kafka_common | kafka_protobuf, "kafka-connect": sql_common | {"requests", "JPype1"}, @@ -631,6 +632,7 @@ "elasticsearch", "feast", "iceberg", + "iceberg-catalog", "mlflow", "json-schema", "ldap", @@ -693,6 +695,7 @@ "hana", "hive", "iceberg", + "iceberg-catalog", "kafka-connect", "ldap", "mongodb", diff --git a/metadata-ingestion/src/datahub/cli/iceberg_cli.py b/metadata-ingestion/src/datahub/cli/iceberg_cli.py new file mode 100644 index 00000000000000..f3552f34f2b5bd --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/iceberg_cli.py @@ -0,0 +1,707 @@ +import json +import logging +import sys +from datetime import datetime +from typing import Any, Dict, Iterator, List, Optional, Tuple, Union +from urllib.parse import urlparse + +import boto3 +import botocore +import click + +import datahub.metadata.schema_classes +from datahub.cli.cli_utils import post_entity +from datahub.configuration.common import GraphError +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.metadata.schema_classes import SystemMetadataClass + +logger = logging.getLogger(__name__) + +DEFAULT_CREDS_EXPIRY_DURATION_SECONDS = 60 * 60 +DEFAULT_FABRIC_TYPE = datahub.metadata.schema_classes.FabricTypeClass.PROD + +DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT = "icebergWarehouseInfo" + + +@click.group() +def iceberg() -> None: + """A group of commands to manage Iceberg warehouses using DataHub as the Iceberg Catalog.""" + pass + + +def validate_creds(client_id: str, client_secret: str, region: str) -> Any: + try: + # Create a boto3 client with the provided credentials + # Using STS (Security Token Service) for validation + sts_client = boto3.client( + "sts", + aws_access_key_id=client_id, + aws_secret_access_key=client_secret, + region_name=region, + ) + + # Try to get caller identity + sts_client.get_caller_identity() + + # If successful, return True and the account info + return sts_client + + except ( + botocore.exceptions.ClientError, + botocore.exceptions.NoCredentialsError, + ): + # If credentials are invalid, return False with error message + click.secho( + "Invalid credentials", + fg="red", + err=True, + ) + sys.exit(1) + + +def validate_role(role: str, sts_client: Any, duration_seconds: Optional[int]) -> None: + try: + session_name = ( + f"datahub-cli-iceberg-validation-{datetime.now().strftime('%Y%m%d%H%M%S')}" + ) + # Assume the IAM role to ensure the settings we have are valid and if not, can report them at config time. + + # If duration_seconds is not specified, datahub will attempt to default to an internal default + # defined in S3CredentialProvider.java DEFAULT_CREDS_DURATION_SECS. However, it is not possible to know for sure + # if that value is permitted based on how the role is configured. So, during the configuration of the warehouse + # we must attempt to use the intended expiration duration (default or explicitly supplied) to ensure it + # actually does work. + if duration_seconds is None: + duration_seconds = DEFAULT_CREDS_EXPIRY_DURATION_SECONDS + + assumed_role = sts_client.assume_role( + RoleArn=role, + RoleSessionName=session_name, + DurationSeconds=duration_seconds, + ) + + # Extract the temporary credentials + credentials = assumed_role["Credentials"] + return credentials + + except Exception as e: + click.secho( + f"Failed to assume role using '{role}' with error: {e}", + fg="red", + err=True, + ) + sys.exit(1) + + +def validate_warehouse(data_root: str) -> None: + # validate data_root location + scheme = urlparse(data_root).scheme + if scheme != "s3": + click.secho( + f"Unsupported warehouse location '{data_root}', supported schemes: s3", + fg="red", + err=True, + ) + sys.exit(1) + + +@iceberg.command() +@click.option( + "-w", "--warehouse", required=True, type=str, help="The name of the warehouse" +) +@click.option( + "-p", "--description", required=False, type=str, help="Description of the warehouse" +) +@click.option( + "-d", + "--data_root", + required=True, + type=str, + help="The path to the data root for the warehouse data", +) +@click.option( + "-i", + "--client_id", + required=True, + type=str, + help="Client ID to authenticate with the storage provider of the data root", +) +@click.option( + "-s", + "--client_secret", + required=True, + type=str, + help="Client Secret to authenticate with the storage provider of the data root", +) +@click.option( + "-g", + "--region", + required=True, + type=str, + help="Storage provider specific region where the warehouse data root is located", +) +@click.option( + "-r", + "--role", + required=True, + type=str, + help="Storage provider specific role to be used when vending credentials", +) +@click.option( + "-e", + "--env", + required=False, + type=str, + help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified", +) +@click.option( + "-x", + "--duration_seconds", + required=False, + type=int, + help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified", +) +def create( + warehouse: str, + description: Optional[str], + data_root: str, + client_id: str, + client_secret: str, + region: str, + role: str, + duration_seconds: Optional[int], + env: Optional[str], +) -> None: + """ + Create an iceberg warehouse. + """ + + client = get_default_graph() + + urn = iceberg_data_platform_instance_urn(warehouse) + + if client.exists(urn): + click.secho( + f"Warehouse with name {warehouse} already exists", + fg="red", + err=True, + ) + sys.exit(1) + + # will throw an actionable error message if invalid. + validate_warehouse(data_root) + storage_client = validate_creds(client_id, client_secret, region) + validate_role(role, storage_client, duration_seconds) + + client_id_urn, client_secret_urn = create_iceberg_secrets( + client, warehouse, client_id, client_secret + ) + + if env is None: + env = DEFAULT_FABRIC_TYPE + + warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT + warehouse_aspect_obj: Dict[str, Any] = { + "dataRoot": data_root, + "clientId": client_id_urn, + "clientSecret": client_secret_urn, + "region": region, + "role": role, + "env": env, + } + + if duration_seconds: + warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds + + data_platform_instance_properties_aspect_obj = { + "name": warehouse, + } + + if description: + data_platform_instance_properties_aspect_obj["description"] = description + + data_platform_instance_properties_aspect = "dataPlatformInstanceProperties" + + entity_type = "dataPlatformInstance" + system_metadata: Union[None, SystemMetadataClass] = None + + post_entity( + client._session, + client.config.server, + urn=urn, + aspect_name=data_platform_instance_properties_aspect, + entity_type=entity_type, + aspect_value=data_platform_instance_properties_aspect_obj, + system_metadata=system_metadata, + ) + + # If status is non 200, post_entity will raise an exception. + + post_entity( + client._session, + client.config.server, + urn=urn, + aspect_name=warehouse_aspect, + entity_type=entity_type, + aspect_value=warehouse_aspect_obj, + system_metadata=system_metadata, + ) + + click.secho( + f"✅ Created warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}", + fg="green", + ) + + +@iceberg.command() +@click.option( + "-w", "--warehouse", required=True, type=str, help="The name of the warehouse" +) +@click.option( + "-p", + "--description", + required=False, + type=str, + help="Description of the warehouse", +) +@click.option( + "-d", + "--data_root", + required=True, + type=str, + help="The path to the data root for the warehouse data", +) +@click.option( + "-i", + "--client_id", + required=True, + type=str, + help="Client ID to authenticate with the storage provider of the data root", +) +@click.option( + "-s", + "--client_secret", + required=True, + type=str, + help="Client Secret to authenticate with the storage provider of the data root", +) +@click.option( + "-g", + "--region", + required=True, + type=str, + help="Storage provider specific region where the warehouse data root is located", +) +@click.option( + "-r", + "--role", + required=True, + type=str, + help="Storage provider specific role to be used when vending credentials", +) +@click.option( + "-e", + "--env", + required=False, + type=str, + help=f"Environment where all assets stored in this warehouse belong to. Defaults to {DEFAULT_FABRIC_TYPE} if unspecified", +) +@click.option( + "-x", + "--duration_seconds", + required=False, + type=int, + help=f"Expiration duration for temporary credentials used for role. Defaults to {DEFAULT_CREDS_EXPIRY_DURATION_SECONDS} seconds if unspecified", +) +def update( + warehouse: str, + data_root: str, + description: Optional[str], + client_id: str, + client_secret: str, + region: str, + role: str, + env: Optional[str], + duration_seconds: Optional[int], +) -> None: + """ + Update iceberg warehouses. Can only update credentials, and role. Cannot update region + """ + + client = get_default_graph() + + urn = iceberg_data_platform_instance_urn(warehouse) + + if not client.exists(urn): + raise click.ClickException(f"Warehouse with name {warehouse} does not exist") + + validate_warehouse(data_root) + storage_client = validate_creds(client_id, client_secret, region) + validate_role(role, storage_client, duration_seconds) + + client_id_urn, client_secret_urn = update_iceberg_secrets( + client, warehouse, client_id, client_secret + ) + + if env is None: + env = DEFAULT_FABRIC_TYPE + + warehouse_aspect = DATA_PLATFORM_INSTANCE_WAREHOUSE_ASPECT + warehouse_aspect_obj: Dict[str, Any] = { + "dataRoot": data_root, + "clientId": client_id_urn, + "clientSecret": client_secret_urn, + "region": region, + "role": role, + "env": env, + } + if duration_seconds: + warehouse_aspect_obj["tempCredentialExpirationSeconds"] = duration_seconds + + data_platform_instance_properties_aspect_obj = { + "name": warehouse, + } + + if description: + data_platform_instance_properties_aspect_obj["description"] = description + + data_platform_instance_properties_aspect = "dataPlatformInstanceProperties" + + entity_type = "dataPlatformInstance" + system_metadata: Union[None, SystemMetadataClass] = None + + post_entity( + client._session, + client.config.server, + urn=urn, + aspect_name=data_platform_instance_properties_aspect, + entity_type=entity_type, + aspect_value=data_platform_instance_properties_aspect_obj, + system_metadata=system_metadata, + ) + + # If status is non 200, post_entity will raise an exception. + post_entity( + client._session, + client.config.server, + urn=urn, + aspect_name=warehouse_aspect, + entity_type=entity_type, + aspect_value=warehouse_aspect_obj, + system_metadata=system_metadata, + ) + + click.secho( + f"✅ Updated warehouse with urn {urn}, clientID: {client_id_urn}, and clientSecret: {client_secret_urn}", + fg="green", + ) + + +@iceberg.command() +def list() -> None: + """ + List iceberg warehouses + """ + + client = get_default_graph() + + for warehouse in get_all_warehouses(client): + click.echo(warehouse) + + +@iceberg.command() +@click.option( + "-w", "--warehouse", required=True, type=str, help="The name of the warehouse" +) +def get(warehouse: str) -> None: + """Fetches the details of the specified iceberg warehouse""" + client = get_default_graph() + urn = iceberg_data_platform_instance_urn(warehouse) + + if client.exists(urn): + warehouse_aspect = client.get_aspect( + entity_urn=urn, + aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass, + ) + click.echo(urn) + if warehouse_aspect: + click.echo(json.dumps(warehouse_aspect.to_obj(), sort_keys=True, indent=2)) + else: + raise click.ClickException(f"Iceberg warehouse {warehouse} does not exist") + + +@iceberg.command() +@click.option( + "-w", "--warehouse", required=True, type=str, help="The name of the warehouse" +) +@click.option("-n", "--dry-run", required=False, is_flag=True) +@click.option( + "-f", + "--force", + required=False, + is_flag=True, + help="force the delete if set without confirmation", +) +def delete(warehouse: str, dry_run: bool, force: bool) -> None: + """ + Delete warehouse + """ + + urn = iceberg_data_platform_instance_urn(warehouse) + + client = get_default_graph() + + if not client.exists(urn): + raise click.ClickException(f"urn {urn} not found") + + # Confirm this is a managed warehouse by checking for presence of IcebergWarehouse aspect + aspect = client.get_aspect( + entity_urn=urn, + aspect_type=datahub.metadata.schema_classes.IcebergWarehouseInfoClass, + ) + if aspect: + warehouse_aspect: datahub.metadata.schema_classes.IcebergWarehouseInfoClass = ( + aspect + ) + + urns_to_delete: List = [] + resource_names_to_be_deleted: List = [] + for entity in get_related_entities_for_platform_instance(client, urn): + # Do we really need this double-check? + if "__typename" in entity and "urn" in entity: + if entity["__typename"] in ["Container", "Dataset"]: + urns_to_delete.append(entity["urn"]) + resource_names_to_be_deleted.append( + entity.get("name", entity.get("urn")) + ) + # TODO: PlatformResource associated with datasets need to be deleted. + + if dry_run: + click.echo( + f"[Dry-run] Would delete warehouse {urn} and the following datasets and namespaces" + ) + for resource in resource_names_to_be_deleted: + click.echo(f" {resource}") + else: + if not force: + click.confirm( + f"This will delete {warehouse} warehouse, credentials, and {len(urns_to_delete)} datasets and namespaces from DataHub. Do you want to continue?", + abort=True, + ) + client.hard_delete_entity(urn) + client.hard_delete_entity(warehouse_aspect.clientId) + client.hard_delete_entity(warehouse_aspect.clientSecret) + + for urn_to_delete in urns_to_delete: + client.hard_delete_entity(urn_to_delete) + + click.echo( + f"✅ Successfully deleted iceberg warehouse {warehouse} and associated credentials, {len(urns_to_delete)} datasets and namespaces" + ) + + +def iceberg_data_platform_instance_urn(warehouse: str) -> str: + return f"urn:li:dataPlatformInstance:({iceberg_data_platform()},{warehouse})" + + +def iceberg_data_platform() -> str: + return "urn:li:dataPlatform:iceberg" + + +def iceberg_client_id_urn(warehouse): + return f"urn:li:dataHubSecret:{warehouse}-client_id" + + +def iceberg_client_secret_urn(warehouse): + return f"urn:li:dataHubSecret:{warehouse}-client_secret" + + +def create_iceberg_secrets( + client: DataHubGraph, warehouse: str, client_id: str, client_secret: str +) -> Tuple[str, str]: + graphql_query = """ + mutation createIcebergSecrets($clientIdName: String!, $clientId: String!, $clientSecretName: String!, $clientSecret: String!) { + createClientId: createSecret( + input: {name: $clientIdName, value: $clientId} + ) + createClientSecret: createSecret( + input: {name: $clientSecretName, value: $clientSecret} + ) + } + """ + variables = { + "clientIdName": f"{warehouse}-client_id", + "clientId": client_id, + "clientSecretName": f"{warehouse}-client_secret", + "clientSecret": client_secret, + } + try: + response = client.execute_graphql( + graphql_query, variables=variables, format_exception=False + ) + except GraphError as graph_error: + try: + error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"')) + click.secho( + f"Failed to save Iceberg warehouse credentials :{error[0]['message']}", + fg="red", + err=True, + ) + except Exception: + click.secho( + f"Failed to save Iceberg warehouse credentials :\n{graph_error}", + fg="red", + err=True, + ) + sys.exit(1) + + if "createClientId" in response and "createClientSecret" in response: + return response["createClientId"], response["createClientSecret"] + + click.secho( + f"Internal error: Unexpected response saving credentials:\n{response}", + fg="red", + err=True, + ) + sys.exit(1) + + +def update_iceberg_secrets( + client: DataHubGraph, warehouse: str, client_id: str, client_secret: str +) -> Tuple[str, str]: + graphql_query = """ + mutation updateIcebergSecrets($clientIdUrn: String!, $clientIdName: String!, $clientId: String!, $clientSecretUrn: String!, $clientSecretName: String!, $clientSecret: String!) { + updateClientId: updateSecret( + input: {urn: $clientIdUrn, name: $clientIdName, value: $clientId} + ) + updateClientSecret: updateSecret( + input: {urn: $clientSecretUrn, name: $clientSecretName, value: $clientSecret} + ) + } + """ + variables = { + "clientIdUrn": iceberg_client_id_urn(warehouse), + "clientIdName": f"{warehouse}-client_id", + "clientId": client_id, + "clientSecretUrn": iceberg_client_secret_urn(warehouse), + "clientSecretName": f"{warehouse}-client_secret", + "clientSecret": client_secret, + } + try: + response = client.execute_graphql( + graphql_query, variables=variables, format_exception=False + ) + except GraphError as graph_error: + try: + error = json.loads(str(graph_error).replace('"', '\\"').replace("'", '"')) + click.secho( + f"Failed to save Iceberg warehouse credentials :{error[0]['message']}", + fg="red", + err=True, + ) + except Exception: + click.secho( + f"Failed to save Iceberg warehouse credentials :\n{graph_error}", + fg="red", + err=True, + ) + sys.exit(1) + + if "updateClientId" in response and "updateClientSecret" in response: + return response["updateClientId"], response["updateClientSecret"] + + click.secho( + f"Internal error: Unexpected response saving credentials:\n{response}", + fg="red", + err=True, + ) + sys.exit(1) + + +def get_all_warehouses(client: DataHubGraph) -> Iterator[str]: + start: int = 0 + total = None + graph_query = """ + query getIcebergWarehouses($start: Int, $count: Int) { + search( + input: {type: DATA_PLATFORM_INSTANCE, query: "*", start: $start, count: $count} + ) { + start + total + searchResults { + entity { + urn + ... on DataPlatformInstance { + instanceId + } + } + } + } + } + """ + count = 10 + variables = {"start": start, "count": count} + while total is None or start < total: + response = client.execute_graphql( + graph_query, variables=variables, format_exception=True + ) + if "search" in response and "total" in response["search"]: + total = response["search"]["total"] + search_results = response["search"].get("searchResults", []) + for result in search_results: + yield result["entity"]["instanceId"] + start += count + variables = {"start": start, "count": count} + # if total is not None and + else: + break + + +def get_related_entities_for_platform_instance( + client: DataHubGraph, data_platform_instance_urn: str +) -> Iterator[Dict]: + start: int = 0 + total = None + + graph_query = """ + query getIcebergResources($platformInstanceUrn: String!, $start: Int!, $count: Int!) { + searchAcrossEntities( + input: {types: [DATASET, CONTAINER], query: "*", start: $start, count: $count, orFilters: [{and: [{field: "platformInstance", values: [$platformInstanceUrn]}]}]} + ) { + start + total + searchResults { + entity { + __typename + urn + ... on Dataset { + urn + name + } + } + } + } + } + """ + count = 10 + variables = { + "start": start, + "count": count, + "platformInstanceUrn": data_platform_instance_urn, + } + while total is None or start < total: + response = client.execute_graphql( + graph_query, variables=variables, format_exception=True + ) + if ( + "searchAcrossEntities" in response + and "total" in response["searchAcrossEntities"] + ): + total = response["searchAcrossEntities"]["total"] + search_results = response["searchAcrossEntities"].get("searchResults", []) + for result in search_results: + yield result["entity"] + start += count + variables["start"] = start + else: + break diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 73d35381d5df29..cdcbffc5c5c2bd 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -20,6 +20,7 @@ from datahub.cli.env_utils import get_boolean_env_variable from datahub.cli.exists_cli import exists from datahub.cli.get_cli import get +from datahub.cli.iceberg_cli import iceberg from datahub.cli.ingest_cli import ingest from datahub.cli.migrate import migrate from datahub.cli.put_cli import put @@ -182,6 +183,7 @@ def init(use_password: bool = False) -> None: datahub.add_command(datacontract) datahub.add_command(assertions) datahub.add_command(container) +datahub.add_command(iceberg) try: from datahub.cli.lite_cli import lite diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 153aa0685f9b26..7de32f7faa328e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1,11 +1,6 @@ package com.linkedin.metadata.entity; -import static com.linkedin.metadata.Constants.APP_SOURCE; -import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; -import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; -import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; -import static com.linkedin.metadata.Constants.SYSTEM_ACTOR; -import static com.linkedin.metadata.Constants.UI_SOURCE; +import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.entity.TransactionContext.DEFAULT_MAX_TRANSACTION_RETRY; import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; @@ -1502,20 +1497,37 @@ public String batchApplyRetention( private boolean preprocessEvent( @Nonnull OperationContext opContext, MetadataChangeLog metadataChangeLog) { + if (updateIndicesService == null) { + return false; + } + + boolean syncIndexUpdate = false; + if (preProcessHooks.isUiEnabled()) { if (metadataChangeLog.getSystemMetadata() != null) { if (metadataChangeLog.getSystemMetadata().getProperties() != null) { if (UI_SOURCE.equals( metadataChangeLog.getSystemMetadata().getProperties().get(APP_SOURCE))) { // Pre-process the update indices hook for UI updates to avoid perceived lag from Kafka - if (updateIndicesService != null) { - updateIndicesService.handleChangeEvent(opContext, metadataChangeLog); - } - return true; + syncIndexUpdate = true; } } } } + if (!syncIndexUpdate && metadataChangeLog.getHeaders() != null) { + if (metadataChangeLog + .getHeaders() + .getOrDefault(SYNC_INDEX_UPDATE_HEADER_NAME, "false") + .equalsIgnoreCase(Boolean.toString(true))) { + // A specific MCP requested a sync index update. + syncIndexUpdate = true; + } + } + + if (syncIndexUpdate) { + updateIndicesService.handleChangeEvent(opContext, metadataChangeLog); + return true; + } return false; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index dabdf493fa1f99..f2726af772f94f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -2006,6 +2006,97 @@ public void testUIPreProcessedProposal() throws Exception { "datasetKey")); } + @Test + public void testSyncHeaderPreProcessedProposal() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"); + EditableDatasetProperties datasetProperties = new EditableDatasetProperties(); + datasetProperties.setDescription("Foo Bar"); + MetadataChangeProposal gmce = new MetadataChangeProposal(); + gmce.setEntityUrn(entityUrn); + gmce.setChangeType(ChangeType.UPSERT); + gmce.setEntityType("dataset"); + gmce.setAspectName("editableDatasetProperties"); + + JacksonDataTemplateCodec dataTemplateCodec = new JacksonDataTemplateCodec(); + byte[] datasetPropertiesSerialized = dataTemplateCodec.dataTemplateToBytes(datasetProperties); + GenericAspect genericAspect = new GenericAspect(); + genericAspect.setValue(ByteString.unsafeWrap(datasetPropertiesSerialized)); + genericAspect.setContentType("application/json"); + gmce.setAspect(genericAspect); + + // verify with sync header + StringMap headers = new StringMap(); + headers.put(SYNC_INDEX_UPDATE_HEADER_NAME, "true"); + gmce.setHeaders(headers); + + ArgumentCaptor mceCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); + _entityServiceImpl.ingestProposal(opContext, gmce, TEST_AUDIT_STAMP, false); + + verify(_mockUpdateIndicesService, times(1)) + .handleChangeEvent(eq(opContext), mceCaptor.capture()); + assertTrue( + mceCaptor + .getValue() + .getHeaders() + .get(SYNC_INDEX_UPDATE_HEADER_NAME) + .equalsIgnoreCase("true")); + assertEquals(mceCaptor.getValue().getEntityUrn(), entityUrn); + } + + @Test + public void testWithoutSyncHeaderOrUISourcePreProcessedProposal() throws Exception { + Urn entityUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"); + EditableDatasetProperties datasetProperties = new EditableDatasetProperties(); + datasetProperties.setDescription("Foo Bar"); + MetadataChangeProposal gmce = new MetadataChangeProposal(); + gmce.setEntityUrn(entityUrn); + gmce.setChangeType(ChangeType.UPSERT); + gmce.setEntityType("dataset"); + gmce.setAspectName("editableDatasetProperties"); + + JacksonDataTemplateCodec dataTemplateCodec = new JacksonDataTemplateCodec(); + byte[] datasetPropertiesSerialized = dataTemplateCodec.dataTemplateToBytes(datasetProperties); + GenericAspect genericAspect = new GenericAspect(); + genericAspect.setValue(ByteString.unsafeWrap(datasetPropertiesSerialized)); + genericAspect.setContentType("application/json"); + gmce.setAspect(genericAspect); + + ArgumentCaptor mceCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); + _entityServiceImpl.ingestProposal(opContext, gmce, TEST_AUDIT_STAMP, false); + + verify(_mockUpdateIndicesService, never()).handleChangeEvent(any(), any()); + } + + @Test + public void testWithNullUpdateIndicesServicePreProcessedProposal() throws Exception { + _entityServiceImpl.setUpdateIndicesService( + null); // this should cause skipping of the sync index update + Urn entityUrn = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)"); + EditableDatasetProperties datasetProperties = new EditableDatasetProperties(); + datasetProperties.setDescription("Foo Bar"); + MetadataChangeProposal gmce = new MetadataChangeProposal(); + gmce.setEntityUrn(entityUrn); + gmce.setChangeType(ChangeType.UPSERT); + gmce.setEntityType("dataset"); + gmce.setAspectName("editableDatasetProperties"); + + StringMap headers = new StringMap(); + headers.put(SYNC_INDEX_UPDATE_HEADER_NAME, "true"); + gmce.setHeaders(headers); + + JacksonDataTemplateCodec dataTemplateCodec = new JacksonDataTemplateCodec(); + byte[] datasetPropertiesSerialized = dataTemplateCodec.dataTemplateToBytes(datasetProperties); + GenericAspect genericAspect = new GenericAspect(); + genericAspect.setValue(ByteString.unsafeWrap(datasetPropertiesSerialized)); + genericAspect.setContentType("application/json"); + gmce.setAspect(genericAspect); + + ArgumentCaptor mceCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); + _entityServiceImpl.ingestProposal(opContext, gmce, TEST_AUDIT_STAMP, false); + + verify(_mockUpdateIndicesService, never()).handleChangeEvent(any(), any()); + } + @Test public void testStructuredPropertyIngestProposal() throws Exception { String urnStr = "urn:li:dataset:(urn:li:dataPlatform:looker,sample_dataset_unique,PROD)"; diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouseInfo.pdl similarity index 67% rename from metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl rename to metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouseInfo.pdl index c17252e6bb3e7c..0bd914dc71da4c 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouse.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/dataplatforminstance/IcebergWarehouseInfo.pdl @@ -1,14 +1,15 @@ namespace com.linkedin.dataplatforminstance import com.linkedin.common.Urn +import com.linkedin.common.FabricType /** * An Iceberg warehouse location and credentails whose read/writes are governed by datahub catalog. */ @Aspect = { - "name": "icebergWarehouse" + "name": "icebergWarehouseInfo" } -record IcebergWarehouse { +record IcebergWarehouseInfo { /** * Path of the root for the backing store of the tables in the warehouse. @@ -34,4 +35,14 @@ record IcebergWarehouse { * Role to be used when vending credentials to writers. */ role: optional string + + /* + * Expiration for temporary credentials created to access this warehouse. + */ + tempCredentialExpirationSeconds: optional int + + /* + * Environment where all assets stored in this warehouse belong to + */ + env: FabricType } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergCatalogInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergCatalogInfo.pdl new file mode 100644 index 00000000000000..085192b51a90a4 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergCatalogInfo.pdl @@ -0,0 +1,25 @@ +namespace com.linkedin.dataset + +import com.linkedin.common.Uri +import com.linkedin.common.CustomProperties +import com.linkedin.common.ExternalReference +import com.linkedin.common.TimeStamp + +/** + * Iceberg Catalog metadata associated with an Iceberg table/view + */ +@Aspect = { + "name": "icebergCatalogInfo" +} +record IcebergCatalogInfo { + + /** + * When Datahub is the REST Catalog for an Iceberg Table, stores the current metadata pointer. + * If the Iceberg table is managed by an external catalog, the metadata pointer is not set. + */ + metadataPointer: optional string + + view: optional boolean + + // tableProperties: map[string, string] = { } +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl deleted file mode 100644 index 01307e785cf5ce..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/IcebergMetadata.pdl +++ /dev/null @@ -1,21 +0,0 @@ -namespace com.linkedin.dataset - -import com.linkedin.common.Uri -import com.linkedin.common.CustomProperties -import com.linkedin.common.ExternalReference -import com.linkedin.common.TimeStamp - -/** - * Iceberg metadata associated with an Iceberg table/view - */ -@Aspect = { - "name": "icebergMetadata" -} -record IcebergMetadata { - - metadataPointer: string - - view: boolean - - // tableProperties: map[string, string] = { } -} diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index 8c35f4e8ddf56d..493b17c7c3d861 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -47,7 +47,7 @@ entities: - forms - partitionsSummary - versionProperties - - icebergMetadata + - icebergCatalogInfo - name: dataHubPolicy doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc. category: internal @@ -338,7 +338,7 @@ entities: - institutionalMemory - deprecation - status - - icebergWarehouse + - icebergWarehouseInfo - name: mlModel category: core keyAspect: mlModelKey diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index ab64c654c22304..4f6bf8d1240033 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -8,7 +8,7 @@ baseUrl: ${DATAHUB_BASE_URL:http://localhost:9002} authentication: # Enable if you want all requests to the Metadata Service to be authenticated. enabled: ${METADATA_SERVICE_AUTH_ENABLED:true} - excludedPaths: /schema-registry/*,/health,/config,/config/search/export + excludedPaths: /schema-registry/*,/health,/config,/config/search/export,/public-iceberg/* # Disable if you want to skip validation of deleted user's tokens enforceExistenceEnabled: ${METADATA_SERVICE_AUTH_ENFORCE_EXISTENCE_ENABLED:true} @@ -649,3 +649,7 @@ metadataChangeProposal: initialIntervalMs: ${MCP_TIMESERIES_INITIAL_INTERVAL_MS:100} multiplier: ${MCP_TIMESERIES_MULTIPLIER:10} maxIntervalMs: ${MCP_TIMESERIES_MAX_INTERVAL_MS:30000} + +icebergCatalog: + enablePublicRead: ${ENABLE_PUBLIC_READ:false} + publiclyReadableTag: ${PUBLICLY_READABLE_TAG:PUBLICLY_READABLE} \ No newline at end of file diff --git a/metadata-service/iceberg-catalog/build.gradle b/metadata-service/iceberg-catalog/build.gradle index f94c88a8ce3000..f5fa8c32e479c7 100644 --- a/metadata-service/iceberg-catalog/build.gradle +++ b/metadata-service/iceberg-catalog/build.gradle @@ -1,6 +1,12 @@ plugins { id 'java' } +apply from: '../../gradle/coverage/java-coverage.gradle' + +ext { + python_executable = 'python3' + venv_name = 'venv' +} dependencies { implementation project(':metadata-service:services') @@ -10,9 +16,9 @@ dependencies { implementation project(':metadata-integration:java:datahub-schematron:lib') implementation 'org.apache.iceberg:iceberg-core:1.6.1' implementation 'org.apache.iceberg:iceberg-aws:1.6.1' - implementation 'software.amazon.awssdk:sts:2.29.29' - implementation 'software.amazon.awssdk:iam-policy-builder:2.29.29' - implementation 'software.amazon.awssdk:s3:2.29.29' + implementation 'software.amazon.awssdk:sts:2.26.12' + implementation 'software.amazon.awssdk:iam-policy-builder:2.26.12' + implementation externalDependency.awsS3 implementation externalDependency.reflections implementation externalDependency.springBoot @@ -32,6 +38,7 @@ dependencies { implementation externalDependency.antlr4 implementation externalDependency.javaxInject implementation externalDependency.avro + implementation externalDependency.guava annotationProcessor externalDependency.lombok @@ -47,4 +54,31 @@ dependencies { testImplementation externalDependency.jacksonCore testImplementation externalDependency.jacksonDataBind testImplementation externalDependency.springBootStarterWeb -} \ No newline at end of file +} + +task installDev(type: Exec) { + inputs.file file('pyproject.toml') + inputs.file file('requirements.txt') + outputs.file("${venv_name}/.build_install_dev_sentinel") + commandLine 'bash', '-c', + "set -x && " + + "${python_executable} -m venv ${venv_name} && " + + "${venv_name}/bin/python -m pip install --upgrade uv && " + + "set +x && source ${venv_name}/bin/activate && set -x && " + + "uv pip install -r requirements.txt && " + + "touch ${venv_name}/.build_install_dev_sentinel" +} + +task integrationTestQuick(type: Exec, dependsOn: installDev) { + workingDir = project.projectDir + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest -m quick" +} + +task integrationTest(type: Exec, dependsOn: installDev) { + workingDir = project.projectDir + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "pytest" +} diff --git a/metadata-service/iceberg-catalog/pyproject.toml b/metadata-service/iceberg-catalog/pyproject.toml new file mode 100644 index 00000000000000..3115f1d7d91c04 --- /dev/null +++ b/metadata-service/iceberg-catalog/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "iceberg-catalog-integration-test" +version = "0.0.0" +description = "" +authors = [ + { name="Acryl Data", email="eng@acryl.io" }, +] +requires-python = ">=3.9" + + +[tool.black] +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +tmp +venv +''' +include = '\.pyi?$' +target-version = ['py310'] + +[tool.isort] +profile = 'black' + +[tool.ruff] +ignore = [ + 'E501', # Ignore line length, since black handles that. + 'D203', # Ignore 1 blank line required before class docstring. +] + +[tool.mypy] +exclude = "^(venv/|build/|dist/)" +ignore_missing_imports = true +namespace_packages = false +check_untyped_defs = true +disallow_untyped_decorators = true +warn_unused_configs = true +# eventually we'd like to enable these +disallow_incomplete_defs = false +disallow_untyped_defs = false + +[tool.pyright] +extraPaths = ['tests'] diff --git a/metadata-service/iceberg-catalog/requirements.txt b/metadata-service/iceberg-catalog/requirements.txt new file mode 100644 index 00000000000000..de0d0e77672fd2 --- /dev/null +++ b/metadata-service/iceberg-catalog/requirements.txt @@ -0,0 +1,16 @@ +pytest>=6.2 +pytest-dependency>=0.5.1 +pyspark==3.5.3 +-e ../../metadata-ingestion[iceberg-catalog] +# libaries for linting below this +black==23.7.0 +isort==5.12.0 +mypy==1.5.1 +ruff==0.0.287 +# stub version are copied from metadata-ingestion/setup.py and that should be the source of truth +types-requests>=2.28.11.6,<=2.31.0.3 +types-PyYAML +# https://github.com/docker/docker-py/issues/3256 +requests<=2.31.0 +# Missing numpy requirement in 8.0.0 +deepdiff!=8.0.0 \ No newline at end of file diff --git a/metadata-service/iceberg-catalog/src/integrationTest/integration_test.py b/metadata-service/iceberg-catalog/src/integrationTest/integration_test.py new file mode 100644 index 00000000000000..afcb74d5c1923a --- /dev/null +++ b/metadata-service/iceberg-catalog/src/integrationTest/integration_test.py @@ -0,0 +1,243 @@ +from pyspark.sql import SparkSession +import os +from urllib.parse import urlparse +import pytest +from datahub.cli import cli_utils, env_utils, iceberg_cli +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph + + +def get_gms_url(): + return os.getenv("DATAHUB_GMS_URL") or "http://localhost:8080" + + +@pytest.fixture +def personal_access_token(): + username = "datahub" + password = "datahub" + token_name, token = cli_utils.generate_access_token( + username, password, get_gms_url() + ) + + # Setting this env var makes get_default_graph use these env vars to create a graphql client. + os.environ["DATAHUB_GMS_TOKEN"] = token + os.environ["DATAHUB_GMS_HOST"] = urlparse(get_gms_url()).hostname + os.environ["DATAHUB_GMS_PORT"] = str(urlparse(get_gms_url()).port) + + yield token + + # revoke token + + +def give_all_permissions(username, policy_name): + client = get_default_graph() + query = """ + mutation createAdminRole($policyName: String!, $user: String!) { + createPolicy( + input: { + name: $policyName, + description: "For Testing", + state: ACTIVE, + type: METADATA, + privileges: ["DATA_READ_WRITE", "DATA_MANAGE_NAMESPACES", "DATA_MANAGE_TABLES", "DATA_MANAGE_VIEWS", "DATA_MANAGE_NAMESPACES", "DATA_LIST_ENTITIES"], + actors: {users: [$user], + allUsers: false, + resourceOwners: true, + allGroups: false}} + ) + } + """ + variables = {"user": f"urn:li:corpuser:{username}", "policyName": policy_name} + + response = client.execute_graphql( + query, variables=variables, format_exception=False + ) + + +@pytest.fixture +def spark_session(personal_access_token, warehouse): + # Create a Spark session + + spark = ( + SparkSession.builder.appName("Simple Example") + .config( + "spark.jars.packages", + "org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1,org.apache.iceberg:iceberg-aws-bundle:1.6.1", + ) + .config("spark.sql.catalog.test", "org.apache.iceberg.spark.SparkCatalog") + .config( + "spark.sql.catalog.spark_catalog", + "org.apache.iceberg.spark.SparkSessionCatalog", + ) + .config( + "spark.sql.extensions", + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions", + ) + .config("spark.sql.catalog.test.type", "rest") + .config("spark.sql.catalog.test.uri", f"{get_gms_url()}/iceberg") + .config("spark.sql.catalog.test.warehouse", warehouse) + .config("spark.sql.catalog.test.token", personal_access_token) + .config("spark.sql.defaultCatalog", "test") + .config("spark.sql.catalog.test.default-namespace", "default") + .config( + "spark.sql.catalog.test.header.X-Iceberg-Access-Delegation", + "vended-credentials", + ) + .config("spark.sql.catalog.test.rest-metrics-reporting-enabled", False) + .master("local[*]") + .getOrCreate() + ) + + # ensure default namespace + spark.sql("create namespace if not exists default") + + yield spark + + # Stop the Spark session + spark.stop() + + +@pytest.fixture(params=[f"test_wh_{index}" for index in range(4)]) +def warehouse(request, personal_access_token): + warehouse_name = request.param + # PAT dependency just to ensure env vars are setup with token + give_all_permissions("datahub", "test-policy") + + data_root = os.getenv( + "ICEBERG_DATA_ROOT", f"s3://srinath-dev/test/{warehouse_name}" + ) + client_id = os.getenv("ICEBERG_CLIENT_ID") + client_secret = os.getenv("ICEBERG_CLIENT_SECRET") + region = os.getenv("ICEBERG_REGION") + role = os.getenv("ICEBERG_ROLE") + + if not all((data_root, client_id, client_secret, region, role)): + pytest.fail( + "Must set ICEBERG_DATA_ROOT, ICEBERG_CLIENT_ID, ICEBERG_CLIENT_SECRET, ICEBERG_REGION, ICEBERG_ROLE" + ) + + try: + iceberg_cli.delete.callback(warehouse_name, dry_run=False, force=True) + print( + f"Deleted warehouse {warehouse_name}" + ) # This ensures we are starting with a new warehouse. + except Exception as e: + print(e) + + iceberg_cli.create.callback( + warehouse=warehouse_name, + description="", + data_root=data_root, + client_id=client_id, + client_secret=client_secret, + region=region, + role=role, + env="PROD", + duration_seconds=60 * 60, + ) + + yield warehouse_name + + +def cleanup(session): + # Cleanup any remnants of past test runs + session.sql("drop table if exists test_table") + session.sql("drop view if exists test_view") + + +def _test_basic_table_ops(spark_session): + spark_session.sql("create table test_table (id int, name string)") + + spark_session.sql("insert into test_table values(1, 'foo' ) ") + result = spark_session.sql("SELECT * FROM test_table") + assert result.count() == 1 + + spark_session.sql("update test_table set name='bar' where id=1") + result = spark_session.sql("SELECT * FROM test_table where name='bar'") + assert result.count() == 1 + + spark_session.sql("delete from test_table") + result = spark_session.sql("SELECT * FROM test_table") + assert result.count() == 0 + + spark_session.sql("drop table test_table") + try: + spark_session.sql("select * from test_table") + assert False, "Table must not exist" + except: + pass # Exception is expected + + # TODO: Add dataset verification + + +def _test_basic_view_ops(spark_session): + spark_session.sql("create table test_table (id int, name string)") + spark_session.sql("insert into test_table values(1, 'foo' ) ") + + spark_session.sql("create view test_view AS select * from test_table") + result = spark_session.sql("SELECT * FROM test_view") + assert result.count() == 1 + + spark_session.sql("DROP VIEW test_view") + try: + spark_session.sql("SELECT * FROM test_view") + assert False, "test_view must not exist" + except: + pass # Exception is expected + + spark_session.sql("drop table test_table") + + +def _test_rename_ops(spark_session): + spark_session.sql("create table test_table (id int, name string)") + spark_session.sql("insert into test_table values(1, 'foo' ) ") + + spark_session.sql("alter table test_table rename to test_table_renamed") + + try: + spark_session.sql("SELECT * FROM test_table") + assert False, "test_table must not exist" + except: + pass # Exception is expected + + spark_session.sql("insert into test_table_renamed values(2, 'bar' ) ") + result = spark_session.sql("SELECT * FROM test_table_renamed") + assert result.count() == 2 + + spark_session.sql("create view test_view as select * from test_table_renamed") + result = spark_session.sql("SELECT * FROM test_view") + assert result.count() == 2 + + spark_session.sql("alter view test_view rename to test_view_renamed") + result = spark_session.sql("SELECT * FROM test_view_renamed") + assert result.count() == 2 + + spark_session.sql("drop view test_view_renamed") + spark_session.sql("drop view test_table_renamed") + + +@pytest.mark.quick +@pytest.mark.parametrize("warehouse", ["test_wh_0"], indirect=True) +def test_iceberg_quick(spark_session, warehouse): + spark_session.sql("use namespace default") + _test_basic_table_ops(spark_session) + _test_basic_view_ops(spark_session) + _test_rename_ops(spark_session) + + +def _create_table(spark_session, ns, table_name): + spark_session.sql("create namespace if not exists default") + spark_session.sql(f"create namespace if not exists {ns}") + spark_session.sql(f"drop table if exists {ns}.{table_name}") + spark_session.sql(f"create table {ns}.{table_name} (id int, name string)") + + spark_session.sql(f"insert into {ns}.{table_name} values (1, 'foo' ) ") + + +def test_load_tables(spark_session, warehouse): + namespace_count = 3 + table_count = 4 + for ns_index in range(namespace_count): + ns = f"default_ns{ns_index}" + for table_index in range(table_count): + table_name = f"table_{table_index}" + _create_table(spark_session, ns, table_name) diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java deleted file mode 100644 index 3e9505f03e15a0..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/CredentialProvider.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.datahub.iceberg.catalog; - -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.linkedin.metadata.authorization.PoliciesConfig; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.TimeUnit; -import lombok.AllArgsConstructor; -import lombok.EqualsAndHashCode; - -public abstract class CredentialProvider { - private static final int CREDS_DURATION_SECS = 15 * 60; - - @EqualsAndHashCode - @AllArgsConstructor - public static class CredentialsCacheKey { - public final String platformInstance; - public final PoliciesConfig.Privilege privilege; - public final Set locations; - } - - @AllArgsConstructor - public static class StorageProviderCredentials { - public final String clientId; - public final String clientSecret; - public final String role; - public final String region; - } - - private final Cache> credentialCache; - - public CredentialProvider() { - this.credentialCache = - CacheBuilder.newBuilder().expireAfterWrite(CREDS_DURATION_SECS, TimeUnit.SECONDS).build(); - } - - public Map get( - CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { - try { - return credentialCache.get(key, () -> loadItem(key, storageProviderCredentials)); - } catch (ExecutionException e) { - throw new RuntimeException(e); - } - } - - protected abstract Map loadItem( - CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials); -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java deleted file mode 100644 index efab3dd823e787..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubRestCatalog.java +++ /dev/null @@ -1,275 +0,0 @@ -package com.datahub.iceberg.catalog; - -import static com.datahub.iceberg.catalog.Utils.*; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; - -import com.datahub.iceberg.catalog.rest.DataHubIcebergWarehouse; -import com.google.common.base.Joiner; -import com.linkedin.common.AuditStamp; -import com.linkedin.common.SubTypes; -import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.common.urn.Urn; -import com.linkedin.container.Container; -import com.linkedin.container.ContainerProperties; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.data.template.StringArray; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; -import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.iceberg.*; -import org.apache.iceberg.aws.s3.S3FileIO; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.SupportsNamespaces; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.AlreadyExistsException; -import org.apache.iceberg.exceptions.NamespaceNotEmptyException; -import org.apache.iceberg.exceptions.NoSuchNamespaceException; -import org.apache.iceberg.io.CloseableGroup; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.io.InputFile; -import org.apache.iceberg.view.BaseMetastoreViewCatalog; -import org.apache.iceberg.view.ViewOperations; - -public class DataHubRestCatalog extends BaseMetastoreViewCatalog implements SupportsNamespaces { - private final CredentialProvider credentialProvider; - - private final EntityService entityService; - - private final OperationContext operationContext; - - private final CloseableGroup closeableGroup; - - private final String CATALOG_POINTER_ROOT_DIR = "s3://srinath-dev/icebreaker/"; - - private final DataHubIcebergWarehouse warehouse; - - public DataHubRestCatalog( - EntityService entityService, - OperationContext operationContext, - DataHubIcebergWarehouse warehouse, - CredentialProvider credentialProvider) { - this.entityService = entityService; - this.operationContext = operationContext; - this.credentialProvider = credentialProvider; - this.warehouse = warehouse; - this.closeableGroup = new CloseableGroup(); - this.closeableGroup.setSuppressCloseFailure(true); - } - - @Override - public void renameView(TableIdentifier tableIdentifier, TableIdentifier tableIdentifier1) {} - - @Override - public void initialize(String name, Map properties) {} - - @Override - protected TableOperations newTableOps(TableIdentifier tableIdentifier) { - return new DataHubTableOps( - platformInstance(), - tableIdentifier, - entityService, - operationContext, - new S3FileIOFactory()); - } - - @Override - protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { - String warehouseRoot = warehouse.getDataRoot(); - return warehouseRoot - + CatalogUtil.fullTableName(platformInstance(), tableIdentifier).replaceAll("\\.", "/"); - } - - @Override - public List listTables(Namespace namespace) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { - if (purge) { - throw new UnsupportedOperationException(); - } - - return deletaDataset(tableIdentifier); - } - - private boolean deletaDataset(TableIdentifier tableIdentifier) { - DatasetUrn urn = datasetUrn(platformInstance(), tableIdentifier); - if (!entityService.exists(operationContext, urn)) { - return false; - } - entityService.deleteUrn(operationContext, urn); - return true; - } - - @Override - public Table registerTable(TableIdentifier identifier, String metadataFileLocation) { - if (tableExists(identifier)) { - throw new AlreadyExistsException("Table already exists: %s", identifier); - } - - FileIO io = - new S3FileIOFactory() - .createIO( - platformInstance(), - PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, - Set.of(parentDir(metadataFileLocation))); - InputFile metadataFile = io.newInputFile(metadataFileLocation); - TableMetadata metadata = TableMetadataParser.read(io, metadataFile); - - TableOperations ops = newTableOps(identifier); - ops.commit(null, metadata); - - return new BaseTable(ops, fullTableName(name(), identifier), metricsReporter()); - } - - @Override - public void renameTable(TableIdentifier tableIdentifier, TableIdentifier tableIdentifier1) { - // TODO - } - - @Override - public void createNamespace(Namespace namespace, Map map) { - AuditStamp auditStamp = auditStamp(); - Urn containerUrn = containerUrn(platformInstance(), namespace); - - int nLevels = namespace.length(); - if (nLevels > 1) { - String[] parentLevels = Arrays.copyOfRange(namespace.levels(), 0, nLevels - 1); - Urn parentContainerUrn = containerUrn(platformInstance(), parentLevels); - if (!entityService.exists(operationContext, parentContainerUrn)) { - throw new NoSuchNamespaceException( - "Parent namespace %s does not exist in platformInstance-catalog %s", - Joiner.on(".").join(parentLevels), platformInstance()); - } - ingestContainerAspect( - containerUrn, - CONTAINER_ASPECT_NAME, - new Container().setContainer(parentContainerUrn), - auditStamp); - } - - ingestContainerAspect( - containerUrn, - CONTAINER_PROPERTIES_ASPECT_NAME, - new ContainerProperties().setName(namespace.levels()[nLevels - 1]), - auditStamp); - - ingestContainerAspect( - containerUrn, - SUB_TYPES_ASPECT_NAME, - new SubTypes().setTypeNames(new StringArray("IcebergNamespace")), - auditStamp); - - MetadataChangeProposal platformInstanceMcp = - platformInstanceMcp(platformInstance(), containerUrn, CONTAINER_ENTITY_NAME); - ingestMcp(platformInstanceMcp, auditStamp); - } - - @Override - public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException { - return List.of(); - } - - @Override - public Map loadNamespaceMetadata(Namespace namespace) - throws NoSuchNamespaceException { - if (entityService.exists(operationContext, containerUrn(platformInstance(), namespace))) { - return Map.of(); - } else { - throw new NoSuchNamespaceException("Namespace does not exist: " + namespace); - } - } - - @Override - public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { - return false; - } - - @Override - public boolean setProperties(Namespace namespace, Map map) - throws NoSuchNamespaceException { - return false; - } - - @Override - public boolean removeProperties(Namespace namespace, Set set) - throws NoSuchNamespaceException { - return false; - } - - @Override - public void close() throws IOException { - super.close(); - this.closeableGroup.close(); - } - - private void ingestContainerAspect( - Urn containerUrn, String aspectName, RecordTemplate aspect, AuditStamp auditStamp) { - MetadataChangeProposal mcp = new MetadataChangeProposal(); - mcp.setEntityUrn(containerUrn); - mcp.setEntityType(CONTAINER_ENTITY_NAME); - mcp.setAspectName(aspectName); - mcp.setAspect(serializeAspect(aspect)); - mcp.setChangeType(ChangeType.UPSERT); - ingestMcp(mcp, auditStamp); - } - - private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { - entityService.ingestProposal(operationContext, mcp, auditStamp, false); - } - - private class S3FileIOFactory implements FileIOFactory { - @Override - public FileIO createIO( - String platformInstance, PoliciesConfig.Privilege privilege, Set locations) { - - FileIO io = new S3FileIO(); - Map creds = - credentialProvider.get( - new S3CredentialProvider.CredentialsCacheKey(platformInstance, privilege, locations), - warehouse.getStorageProviderCredentials()); - io.initialize(creds); - closeableGroup.addCloseable(io); - return io; - } - - @Override - public FileIO createIO( - String platformInstance, PoliciesConfig.Privilege privilege, TableMetadata tableMetadata) { - return createIO(platformInstance, privilege, locations(tableMetadata)); - } - } - - @Override - protected ViewOperations newViewOps(TableIdentifier tableIdentifier) { - return new DataHubViewOps( - platformInstance(), - tableIdentifier, - entityService, - operationContext, - new S3FileIOFactory()); - } - - @Override - public List listViews(Namespace namespace) { - return List.of(); - } - - @Override - public boolean dropView(TableIdentifier tableIdentifier) { - return deletaDataset(tableIdentifier); - } - - private String platformInstance() { - return warehouse.getPlatformInstance(); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java deleted file mode 100644 index a2b6cc0965addb..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubTableOps.java +++ /dev/null @@ -1,216 +0,0 @@ -package com.datahub.iceberg.catalog; - -import static com.datahub.iceberg.catalog.Utils.*; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; -import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; - -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.container.Container; -import com.linkedin.data.template.StringMap; -import com.linkedin.dataset.DatasetProperties; -import com.linkedin.dataset.IcebergMetadata; -import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.schema.SchemaMetadata; -import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; -import java.util.Collections; -import java.util.Set; -import lombok.SneakyThrows; -import org.apache.avro.Schema; -import org.apache.iceberg.BaseMetastoreTableOperations; -import org.apache.iceberg.TableMetadata; -import org.apache.iceberg.TableMetadataParser; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.AlreadyExistsException; -import org.apache.iceberg.exceptions.CommitFailedException; -import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.io.FileIO; - -public class DataHubTableOps extends BaseMetastoreTableOperations { - - private static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergMetadata"; - - private final String platformInstance; - private FileIO io; - private final TableIdentifier tableIdentifier; - private final DatasetUrn urn; - private final EntityService entityService; - private final OperationContext operationContext; - private final FileIOFactory fileIOFactory; - private volatile TableMetadata currentMetadata = null; - private volatile boolean shouldRefresh = true; - - public DataHubTableOps( - String platformInstance, - TableIdentifier tableIdentifier, - EntityService entityService, - OperationContext operationContext, - FileIOFactory fileIOFactory) { - this.platformInstance = platformInstance; - this.tableIdentifier = tableIdentifier; - this.entityService = entityService; - this.operationContext = operationContext; - this.fileIOFactory = fileIOFactory; - this.urn = datasetUrn(platformInstance, tableIdentifier); - } - - @Override - public TableMetadata refresh() { - IcebergMetadata icebergMeta = - (IcebergMetadata) - entityService.getLatestAspect( - operationContext, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); - if (icebergMeta == null || icebergMeta.isView()) { - return null; - } - String location = icebergMeta.getMetadataPointer(); - if (io == null) { - io = - fileIOFactory.createIO( - platformInstance, - PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, - Set.of(parentDir(location))); - } - // TODO check UUID ala HadoopTableOps? - currentMetadata = TableMetadataParser.read(io(), location); - shouldRefresh = false; - return currentMetadata; - } - - @Override - public TableMetadata current() { - if (shouldRefresh) { - return refresh(); - } - return currentMetadata; - } - - @SneakyThrows - @Override - protected void doCommit(TableMetadata base, TableMetadata metadata) { - - EnvelopedAspect existingEnveloped = - entityService.getLatestEnvelopedAspect( - operationContext, DATASET_ENTITY_NAME, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); - - boolean creation = base == null; - - if (existingEnveloped != null) { - if (creation) { - throw new AlreadyExistsException("Table already exists: %s", tableName()); - } - IcebergMetadata existingMetadata = new IcebergMetadata(existingEnveloped.getValue().data()); - if (existingMetadata.isView()) { - throw new NoSuchTableException("%s is not a table", tableName()); - } - if (!existingMetadata.getMetadataPointer().equals(base.metadataFileLocation())) { - throw new CommitFailedException( - "Cannot commit to table %s: stale table metadata", tableName()); - } - } - - // attempt to commit - io = - fileIOFactory.createIO( - platformInstance, PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, metadata); - String newMetadataLocation = writeNewMetadataIfRequired(base == null, metadata); - - MetadataChangeProposal icebergMcp = newMcp(DATASET_ICEBERG_METADATA_ASPECT_NAME); - icebergMcp.setAspect( - serializeAspect( - new IcebergMetadata().setMetadataPointer(newMetadataLocation).setView(false))); - - if (creation) { - icebergMcp.setChangeType(ChangeType.CREATE_ENTITY); - } else { - String existingVersion = existingEnveloped.getSystemMetadata().getVersion(); - icebergMcp.setHeaders( - new StringMap(Collections.singletonMap(HTTP_HEADER_IF_VERSION_MATCH, existingVersion))); - icebergMcp.setChangeType( - ChangeType.UPSERT); // ideally should be UPDATE, but seems not supported yet. - } - AuditStamp auditStamp = auditStamp(); - try { - ingestMcp(icebergMcp, auditStamp); - } catch (ValidationException e) { - if (creation) { - // this is likely because table already exists i.e. created concurrently in a race condition - throw new AlreadyExistsException("Table already exists: %s", tableName()); - } else { - throw new CommitFailedException( - "Cannot commit to table %s: stale table metadata", tableName()); - } - } - - if (base == null || (base.currentSchemaId() != metadata.currentSchemaId())) { - // schema changed - Schema avroSchema = AvroSchemaUtil.convert(metadata.schema(), tableName()); - AvroSchemaConverter converter = AvroSchemaConverter.builder().build(); - SchemaMetadata schemaMetadata = - converter.toDataHubSchema(avroSchema, false, false, platformUrn(), null); - MetadataChangeProposal schemaMcp = newMcp(SCHEMA_METADATA_ASPECT_NAME); - schemaMcp.setAspect(serializeAspect(schemaMetadata)); - schemaMcp.setChangeType(ChangeType.UPSERT); - ingestMcp(schemaMcp, auditStamp); - } - - if (creation) { - DatasetProperties datasetProperties = new DatasetProperties(); - datasetProperties.setName(tableIdentifier.name()); - datasetProperties.setQualifiedName(tableName()); - - MetadataChangeProposal datasetPropertiesMcp = newMcp(DATASET_PROPERTIES_ASPECT_NAME); - datasetPropertiesMcp.setAspect(serializeAspect(datasetProperties)); - datasetPropertiesMcp.setChangeType(ChangeType.UPSERT); - - ingestMcp(datasetPropertiesMcp, auditStamp); - - MetadataChangeProposal platformInstanceMcp = - platformInstanceMcp(platformInstance, urn, DATASET_ENTITY_NAME); - ingestMcp(platformInstanceMcp, auditStamp); - - Container container = new Container(); - container.setContainer(containerUrn(platformInstance, tableIdentifier.namespace())); - - MetadataChangeProposal containerMcp = newMcp(CONTAINER_ASPECT_NAME); - containerMcp.setAspect(serializeAspect(container)); - containerMcp.setChangeType(ChangeType.UPSERT); - ingestMcp(containerMcp, auditStamp); - } - } - - @Override - public void commit(TableMetadata base, TableMetadata metadata) { - super.commit(base, metadata); - } - - @Override - protected String tableName() { - return fullTableName(platformInstance, tableIdentifier); - } - - @Override - public FileIO io() { - return io; - } - - private MetadataChangeProposal newMcp(String aspectName) { - MetadataChangeProposal mcp = new MetadataChangeProposal(); - mcp.setEntityUrn(urn); - mcp.setEntityType(DATASET_ENTITY_NAME); - mcp.setAspectName(aspectName); - return mcp; - } - - private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { - entityService.ingestProposal(operationContext, mcp, auditStamp, false); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java deleted file mode 100644 index 8fdd61b863600e..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataHubViewOps.java +++ /dev/null @@ -1,243 +0,0 @@ -package com.datahub.iceberg.catalog; - -import static com.datahub.iceberg.catalog.Utils.*; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; -import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; - -import com.linkedin.common.AuditStamp; -import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.container.Container; -import com.linkedin.data.template.StringMap; -import com.linkedin.dataset.DatasetProperties; -import com.linkedin.dataset.IcebergMetadata; -import com.linkedin.dataset.ViewProperties; -import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.authorization.PoliciesConfig; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.schema.SchemaMetadata; -import io.datahubproject.metadata.context.OperationContext; -import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; -import java.util.Collections; -import java.util.Set; -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; -import org.apache.avro.Schema; -import org.apache.iceberg.avro.AvroSchemaUtil; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.exceptions.AlreadyExistsException; -import org.apache.iceberg.exceptions.CommitFailedException; -import org.apache.iceberg.exceptions.NoSuchTableException; -import org.apache.iceberg.exceptions.ValidationException; -import org.apache.iceberg.io.FileIO; -import org.apache.iceberg.view.*; - -@Slf4j -public class DataHubViewOps extends BaseViewOperations { - - private static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergMetadata"; - - private final String platformInstance; - private FileIO io; - private final TableIdentifier tableIdentifier; - private final DatasetUrn urn; - private final EntityService entityService; - private final OperationContext operationContext; - private final FileIOFactory fileIOFactory; - private volatile ViewMetadata currentMetadata = null; - private volatile boolean shouldRefresh = true; - - public DataHubViewOps( - String platformInstance, - TableIdentifier tableIdentifier, - EntityService entityService, - OperationContext operationContext, - FileIOFactory fileIOFactory) { - this.platformInstance = platformInstance; - this.tableIdentifier = tableIdentifier; - this.entityService = entityService; - this.operationContext = operationContext; - this.fileIOFactory = fileIOFactory; - this.urn = datasetUrn(platformInstance, tableIdentifier); - } - - @Override - public ViewMetadata refresh() { - IcebergMetadata icebergMeta = - (IcebergMetadata) - entityService.getLatestAspect( - operationContext, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); - if (icebergMeta == null || !icebergMeta.isView()) { - return null; - } - String location = icebergMeta.getMetadataPointer(); - if (io == null) { - String locationDir = location.substring(0, location.lastIndexOf("/")); - io = - fileIOFactory.createIO( - platformInstance, PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, Set.of(locationDir)); - } - // TODO check UUID ala HadoopTableOps? - currentMetadata = ViewMetadataParser.read(io().newInputFile(location)); - shouldRefresh = false; - return currentMetadata; - } - - @Override - public ViewMetadata current() { - if (shouldRefresh) { - return refresh(); - } - return currentMetadata; - } - - @Override - protected void doRefresh() { - throw new UnsupportedOperationException(); - } - - @SneakyThrows - @Override - protected void doCommit(ViewMetadata base, ViewMetadata metadata) { - - EnvelopedAspect existingEnveloped = - entityService.getLatestEnvelopedAspect( - operationContext, DATASET_ENTITY_NAME, urn, DATASET_ICEBERG_METADATA_ASPECT_NAME); - - boolean creation = base == null; - - if (existingEnveloped != null) { - if (creation) { - throw new AlreadyExistsException("Table already exists: %s", viewName()); - } - IcebergMetadata existingMetadata = new IcebergMetadata(existingEnveloped.getValue().data()); - if (!existingMetadata.isView()) { - throw new NoSuchTableException("%s is not a view", viewName()); - } - if (!existingMetadata.getMetadataPointer().equals(base.metadataFileLocation())) { - throw new CommitFailedException( - "Cannot commit to table %s: stale table metadata", viewName()); - } - } - - // attempt to commit - io = - fileIOFactory.createIO( - platformInstance, - PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, - Set.of(metadata.location())); - String newMetadataLocation = writeNewMetadataIfRequired(metadata); - - MetadataChangeProposal icebergMcp = newMcp(DATASET_ICEBERG_METADATA_ASPECT_NAME); - icebergMcp.setAspect( - serializeAspect( - new IcebergMetadata().setMetadataPointer(newMetadataLocation).setView(true))); - - if (creation) { - icebergMcp.setChangeType(ChangeType.CREATE_ENTITY); - } else { - String existingVersion = existingEnveloped.getSystemMetadata().getVersion(); - icebergMcp.setHeaders( - new StringMap(Collections.singletonMap(HTTP_HEADER_IF_VERSION_MATCH, existingVersion))); - icebergMcp.setChangeType( - ChangeType.UPSERT); // ideally should be UPDATE, but seems not supported yet. - } - AuditStamp auditStamp = auditStamp(); - try { - ingestMcp(icebergMcp, auditStamp); - } catch (ValidationException e) { - if (creation) { - // this is likely because table already exists i.e. created concurrently in a race condition - throw new AlreadyExistsException("View already exists: %s", viewName()); - } else { - throw new CommitFailedException( - "Cannot commit to table %s: stale table metadata", viewName()); - } - } - - if (base == null || (base.currentSchemaId() != metadata.currentSchemaId())) { - // schema changed - Schema avroSchema = AvroSchemaUtil.convert(metadata.schema(), viewName()); - AvroSchemaConverter converter = AvroSchemaConverter.builder().build(); - SchemaMetadata schemaMetadata = - converter.toDataHubSchema(avroSchema, false, false, platformUrn(), null); - MetadataChangeProposal schemaMcp = newMcp(SCHEMA_METADATA_ASPECT_NAME); - schemaMcp.setAspect(serializeAspect(schemaMetadata)); - schemaMcp.setChangeType(ChangeType.UPSERT); - ingestMcp(schemaMcp, auditStamp); - } - - if (creation) { - DatasetProperties datasetProperties = new DatasetProperties(); - datasetProperties.setName(tableIdentifier.name()); - datasetProperties.setQualifiedName(viewName()); - - MetadataChangeProposal datasetPropertiesMcp = newMcp(DATASET_PROPERTIES_ASPECT_NAME); - datasetPropertiesMcp.setAspect(serializeAspect(datasetProperties)); - datasetPropertiesMcp.setChangeType(ChangeType.UPSERT); - - ingestMcp(datasetPropertiesMcp, auditStamp); - - MetadataChangeProposal platformInstanceMcp = - platformInstanceMcp(platformInstance, urn, DATASET_ENTITY_NAME); - ingestMcp(platformInstanceMcp, auditStamp); - - Container container = new Container(); - container.setContainer(containerUrn(platformInstance, tableIdentifier.namespace())); - - MetadataChangeProposal containerMcp = newMcp(CONTAINER_ASPECT_NAME); - containerMcp.setAspect(serializeAspect(container)); - containerMcp.setChangeType(ChangeType.UPSERT); - ingestMcp(containerMcp, auditStamp); - } - - SQLViewRepresentation sqlViewRepresentation = null; - for (ViewRepresentation representation : metadata.currentVersion().representations()) { - if (representation instanceof SQLViewRepresentation) { - sqlViewRepresentation = (SQLViewRepresentation) representation; - // use only first representation, as DataHub model currently supports one SQL. - break; - } - } - if (sqlViewRepresentation == null) { - // base class is ensuring that a representation has been specified in case of replace-view. - // so, this shouldn't occur. - log.warn("No SQL representation for view {}", viewName()); - } else { - ViewProperties viewProperties = - new ViewProperties() - .setViewLogic(sqlViewRepresentation.sql()) - .setMaterialized(false) - .setViewLanguage(sqlViewRepresentation.dialect()); - MetadataChangeProposal viewPropertiesMcp = newMcp(VIEW_PROPERTIES_ASPECT_NAME); - viewPropertiesMcp.setAspect(serializeAspect(viewProperties)); - viewPropertiesMcp.setChangeType(ChangeType.UPSERT); - - ingestMcp(viewPropertiesMcp, auditStamp); - } - } - - @Override - protected String viewName() { - return fullTableName(platformInstance, tableIdentifier); - } - - @Override - public FileIO io() { - return io; - } - - private MetadataChangeProposal newMcp(String aspectName) { - MetadataChangeProposal mcp = new MetadataChangeProposal(); - mcp.setEntityUrn(urn); - mcp.setEntityType(DATASET_ENTITY_NAME); - mcp.setAspectName(aspectName); - return mcp; - } - - private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { - entityService.ingestProposal(operationContext, mcp, auditStamp, false); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java deleted file mode 100644 index 8b137891791fe9..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/IcebergApiController.java +++ /dev/null @@ -1 +0,0 @@ - diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java deleted file mode 100644 index d904cc7912923f..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/DataHubIcebergWarehouse.java +++ /dev/null @@ -1,80 +0,0 @@ -package com.datahub.iceberg.catalog.rest; - -import com.datahub.iceberg.catalog.CredentialProvider; -import com.datahub.iceberg.catalog.Utils; -import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.dataplatforminstance.IcebergWarehouse; -import com.linkedin.metadata.entity.EntityService; -import com.linkedin.secret.DataHubSecretValue; -import io.datahubproject.metadata.context.OperationContext; -import java.util.List; -import java.util.Map; -import java.util.Set; -import lombok.Getter; - -public class DataHubIcebergWarehouse { - - private final EntityService entityService; - - private final OperationContext operationContext; - - private final IcebergWarehouse icebergWarehouse; - - @Getter private final String platformInstance; - - private DataHubIcebergWarehouse( - String platformInstance, - IcebergWarehouse icebergWarehouse, - EntityService entityService, - OperationContext operationContext) { - this.platformInstance = platformInstance; - this.icebergWarehouse = icebergWarehouse; - this.entityService = entityService; - this.operationContext = operationContext; - } - - public static DataHubIcebergWarehouse of( - String platformInstance, EntityService entityService, OperationContext operationContext) { - Urn platformInstanceUrn = Utils.platformInstanceUrn(platformInstance); - RecordTemplate warehouseAspect = - entityService.getLatestAspect(operationContext, platformInstanceUrn, "icebergWarehouse"); - - if (warehouseAspect == null) { - throw new RuntimeException("Unknown warehouse"); - } - - IcebergWarehouse icebergWarehouse = new IcebergWarehouse(warehouseAspect.data()); - return new DataHubIcebergWarehouse( - platformInstance, icebergWarehouse, entityService, operationContext); - } - - public CredentialProvider.StorageProviderCredentials getStorageProviderCredentials() { - - Urn clientIdUrn, clientSecretUrn; - String role, region; - - clientIdUrn = icebergWarehouse.getClientId(); - clientSecretUrn = icebergWarehouse.getClientSecret(); - role = icebergWarehouse.getRole(); - region = icebergWarehouse.getRegion(); - - Map> credsMap = - entityService.getLatestAspects( - operationContext, Set.of(clientIdUrn, clientSecretUrn), Set.of("dataHubSecretValue")); - - DataHubSecretValue clientIdValue = - new DataHubSecretValue(credsMap.get(clientIdUrn).get(1).data()); - String clientId = clientIdValue.getValue(); - - DataHubSecretValue clientSecretValue = - new DataHubSecretValue(credsMap.get(clientSecretUrn).get(1).data()); - String clientSecret = clientSecretValue.getValue(); - - return new CredentialProvider.StorageProviderCredentials(clientId, clientSecret, role, region); - } - - public String getDataRoot() { - return icebergWarehouse.getDataRoot(); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java deleted file mode 100644 index 2926aa0b4769cf..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergExceptionHandlerAdvice.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.datahub.iceberg.catalog.rest; - -import com.fasterxml.jackson.core.JsonProcessingException; -import org.apache.iceberg.exceptions.*; -import org.apache.iceberg.rest.responses.ErrorResponse; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.ControllerAdvice; -import org.springframework.web.bind.annotation.ExceptionHandler; - -@ControllerAdvice(basePackageClasses = AbstractIcebergController.class) -public class IcebergExceptionHandlerAdvice { - - @ExceptionHandler(AlreadyExistsException.class) - public ResponseEntity handle(AlreadyExistsException e) throws JsonProcessingException { - return err(e, HttpStatus.CONFLICT); - } - - @ExceptionHandler(NoSuchNamespaceException.class) - public ResponseEntity handle(NoSuchNamespaceException e) throws JsonProcessingException { - return err(e, HttpStatus.NOT_FOUND); - } - - @ExceptionHandler(NoSuchTableException.class) - public ResponseEntity handle(NoSuchTableException e) throws JsonProcessingException { - return err(e, HttpStatus.NOT_FOUND); - } - - @ExceptionHandler(NoSuchViewException.class) - public ResponseEntity handle(NoSuchViewException e) throws JsonProcessingException { - return err(e, HttpStatus.NOT_FOUND); - } - - @ExceptionHandler(ForbiddenException.class) - public ResponseEntity handle(ForbiddenException e) throws JsonProcessingException { - return err(e, HttpStatus.FORBIDDEN); - } - - @ExceptionHandler(BadRequestException.class) - public ResponseEntity handle(BadRequestException e) throws JsonProcessingException { - return err(e, HttpStatus.BAD_REQUEST); - } - - private ResponseEntity err(Exception e, HttpStatus errCode) throws JsonProcessingException { - ErrorResponse err = - ErrorResponse.builder() - .responseCode(errCode.value()) - .withMessage(e.getMessage()) - .withType(e.getClass().getSimpleName()) - .build(); - return new ResponseEntity<>(err, errCode); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java deleted file mode 100644 index 127c9e76a3b41c..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergNamespaceApiController.java +++ /dev/null @@ -1,76 +0,0 @@ -package com.datahub.iceberg.catalog.rest; - -import static com.datahub.iceberg.catalog.Utils.*; - -import com.datahub.iceberg.catalog.DataOperation; -import jakarta.servlet.http.HttpServletRequest; -import javax.annotation.Nonnull; -import lombok.extern.slf4j.Slf4j; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.rest.requests.CreateNamespaceRequest; -import org.apache.iceberg.rest.responses.CreateNamespaceResponse; -import org.apache.iceberg.rest.responses.GetNamespaceResponse; -import org.springframework.http.MediaType; -import org.springframework.web.bind.annotation.*; - -@Slf4j -@RestController -public class IcebergNamespaceApiController extends AbstractIcebergController { - - @GetMapping( - value = "/v1/{prefix}/namespaces/{namespace}", - produces = MediaType.APPLICATION_JSON_VALUE) - public GetNamespaceResponse getNamespace( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace) { - log.info("GET NAMESPACE REQUEST ns {}", namespace); - - GetNamespaceResponse getNamespaceResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false), - catalog -> { - // not supporting properties; simply load to ensure existence - Namespace ns = namespaceFromString(namespace); - catalog.loadNamespaceMetadata(ns); - return GetNamespaceResponse.builder().withNamespace(ns).build(); - }, - null); - - log.info("GET NAMESPACE RESPONSE {}", getNamespaceResponse); - return getNamespaceResponse; - } - - @PostMapping( - value = "/v1/{prefix}/namespaces", - consumes = MediaType.APPLICATION_JSON_VALUE, - produces = MediaType.APPLICATION_JSON_VALUE) - public CreateNamespaceResponse createNamespace( - HttpServletRequest request, - @RequestBody @Nonnull CreateNamespaceRequest createNamespaceRequest, - @PathVariable("prefix") String platformInstance) { - log.info("CREATE NAMESPACE REQUEST {} ", createNamespaceRequest); - - CreateNamespaceResponse createNamespaceResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false), - catalog -> { - catalog.createNamespace(createNamespaceRequest.namespace()); - return CreateNamespaceResponse.builder() - .withNamespace(createNamespaceRequest.namespace()) - .build(); - }, - null); - - log.info("CREATE NAMESPACE RESPONSE {}", createNamespaceResponse); - return createNamespaceResponse; - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java deleted file mode 100644 index 650e054eaeb461..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergTableApiController.java +++ /dev/null @@ -1,235 +0,0 @@ -package com.datahub.iceberg.catalog.rest; - -import static com.datahub.iceberg.catalog.Utils.*; - -import com.datahub.iceberg.catalog.CredentialProvider; -import com.datahub.iceberg.catalog.DataOperation; -import com.linkedin.metadata.authorization.PoliciesConfig; -import jakarta.servlet.http.HttpServletRequest; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.rest.CatalogHandlers; -import org.apache.iceberg.rest.requests.CreateTableRequest; -import org.apache.iceberg.rest.requests.RegisterTableRequest; -import org.apache.iceberg.rest.requests.UpdateTableRequest; -import org.apache.iceberg.rest.responses.LoadTableResponse; -import org.springframework.http.MediaType; -import org.springframework.web.bind.annotation.*; - -@Slf4j -@RestController -public class IcebergTableApiController extends AbstractIcebergController { - - @PostMapping( - value = "/v1/{prefix}/namespaces/{namespace}/tables", - produces = MediaType.APPLICATION_JSON_VALUE, - consumes = MediaType.APPLICATION_JSON_VALUE) - public LoadTableResponse createTable( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @RequestBody CreateTableRequest createTableRequest, - @RequestHeader(value = "X-Iceberg-Access-Delegation") String xIcebergAccessDelegation) { - log.info("CREATE TABLE REQUEST {}", createTableRequest); - - LoadTableResponse createTableResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), - catalog -> { - // ensure namespace exists - Namespace ns = namespaceFromString(namespace); - catalog.loadNamespaceMetadata(ns); - if (createTableRequest.stageCreate()) { - return CatalogHandlers.stageTableCreate(catalog, ns, createTableRequest); - } else { - return CatalogHandlers.createTable(catalog, ns, createTableRequest); - } - }, - catalogOperationResult -> { - log.info( - "CREATE TABLE RESPONSE, excluding creds, {}", - catalogOperationResult.getResponse()); - return includeCreds( - platformInstance, - xIcebergAccessDelegation, - catalogOperationResult.getResponse(), - PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, - catalogOperationResult.getStorageProviderCredentials()); - }); - - return createTableResponse; - } - - private LoadTableResponse includeCreds( - String platformInstance, - String xIcebergAccessDelegation, - LoadTableResponse loadTableResponse, - PoliciesConfig.Privilege privilege, - CredentialProvider.StorageProviderCredentials storageProviderCredentials) { - if ("vended-credentials".equals(xIcebergAccessDelegation)) { - CredentialProvider.CredentialsCacheKey cacheKey = - new CredentialProvider.CredentialsCacheKey( - platformInstance, privilege, locations(loadTableResponse.tableMetadata())); - Map creds = credentialProvider.get(cacheKey, storageProviderCredentials); - log.info( - "STS creds {} for primary table location {}", - creds, - loadTableResponse.tableMetadata().location()); - - return LoadTableResponse.builder() - .withTableMetadata(loadTableResponse.tableMetadata()) - .addAllConfig(creds) - .build(); - } else { - return loadTableResponse; - } - } - - @GetMapping( - value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", - produces = MediaType.APPLICATION_JSON_VALUE) - public LoadTableResponse loadTable( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("table") String table, - @RequestHeader(value = "X-Iceberg-Access-Delegation", required = false) - String xIcebergAccessDelegation, - @RequestParam(value = "snapshots", required = false) String snapshots) { - log.info( - "GET TABLE REQUEST {} {}.{} ; access-delegation: {}", - platformInstance, - namespace, - table, - xIcebergAccessDelegation); - - LoadTableResponse getTableResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, - platformInstance, - tableIdFromString(namespace, table), - DataOperation.READ_ONLY, - true), - catalog -> CatalogHandlers.loadTable(catalog, tableIdFromString(namespace, table)), - catalogOperationResult -> { - log.info( - "GET TABLE RESPONSE, excluding creds, {}", catalogOperationResult.getResponse()); - PoliciesConfig.Privilege privilege = catalogOperationResult.getPrivilege(); - if (privilege == PoliciesConfig.DATA_MANAGE_TABLES_PRIVILEGE) { - privilege = PoliciesConfig.DATA_READ_WRITE_PRIVILEGE; - } else if (privilege == PoliciesConfig.DATA_MANAGE_VIEWS_PRIVILEGE) { - privilege = PoliciesConfig.DATA_READ_ONLY_PRIVILEGE; - } - return includeCreds( - platformInstance, - xIcebergAccessDelegation, - catalogOperationResult.getResponse(), - privilege, - catalogOperationResult.getStorageProviderCredentials()); - }); - - return getTableResponse; - } - - @PostMapping( - value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", - produces = MediaType.APPLICATION_JSON_VALUE, - consumes = MediaType.APPLICATION_JSON_VALUE) - public LoadTableResponse updateTable( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("table") String table, - @RequestBody UpdateTableRequest updateTableRequest) { - - log.info("UPDATE TABLE REQUEST {}.{}, body {} ", namespace, table, updateTableRequest); - - LoadTableResponse updateTableResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, - platformInstance, - tableIdFromString(namespace, table), - DataOperation.READ_WRITE, - false), - catalog -> - CatalogHandlers.updateTable( - catalog, tableIdFromString(namespace, table), updateTableRequest), - null); - - // not refreshing credentials here. - log.info("UPDATE TABLE RESPONSE {}", updateTableResponse); - - return updateTableResponse; - } - - @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}") - public void dropTable( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("table") String table, - @RequestParam(value = "purgeRequested", defaultValue = "false") Boolean purgeRequested) { - - log.info("DROP TABLE REQUEST ns {} table {}", namespace, table); - - catalogOperation( - platformInstance, - request, - operationContext -> - authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), - catalog -> { - TableIdentifier tableIdentifier = tableIdFromString(namespace, table); - if (purgeRequested) { - CatalogHandlers.purgeTable(catalog, tableIdentifier); - log.info("PURGED TABLE {}", tableIdentifier); - } else { - CatalogHandlers.dropTable(catalog, tableIdentifier); - log.info("DROPPED TABLE {}", tableIdentifier); - } - return null; - }, - null); - } - - @PostMapping( - value = "/v1/{prefix}/namespaces/{namespace}/register", - produces = MediaType.APPLICATION_JSON_VALUE, - consumes = MediaType.APPLICATION_JSON_VALUE) - public LoadTableResponse registerTable( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @RequestBody RegisterTableRequest registerTableRequest) { - log.info("REGISTER TABLE REQUEST {}", registerTableRequest); - - LoadTableResponse registerTableResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false), - catalog -> { - // ensure namespace exists - Namespace ns = namespaceFromString(namespace); - catalog.loadNamespaceMetadata(ns); - return CatalogHandlers.registerTable(catalog, ns, registerTableRequest); - }, - null); - - log.info("REGISTER TABLE RESPONSE {}", registerTableResponse); - return registerTableResponse; - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java b/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java deleted file mode 100644 index 262efc72eba56b..00000000000000 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergViewApiController.java +++ /dev/null @@ -1,136 +0,0 @@ -package com.datahub.iceberg.catalog.rest; - -import static com.datahub.iceberg.catalog.Utils.*; - -import com.datahub.iceberg.catalog.DataOperation; -import jakarta.servlet.http.HttpServletRequest; -import java.net.URLEncoder; -import java.nio.charset.Charset; -import lombok.extern.slf4j.Slf4j; -import org.apache.iceberg.catalog.Namespace; -import org.apache.iceberg.catalog.TableIdentifier; -import org.apache.iceberg.rest.CatalogHandlers; -import org.apache.iceberg.rest.RESTUtil; -import org.apache.iceberg.rest.requests.CreateViewRequest; -import org.apache.iceberg.rest.requests.UpdateTableRequest; -import org.apache.iceberg.rest.responses.LoadViewResponse; -import org.springframework.http.MediaType; -import org.springframework.web.bind.annotation.*; - -@Slf4j -@RestController -public class IcebergViewApiController extends AbstractIcebergController { - - @PostMapping( - value = "/v1/{prefix}/namespaces/{namespace}/views", - produces = MediaType.APPLICATION_JSON_VALUE, - consumes = MediaType.APPLICATION_JSON_VALUE) - public LoadViewResponse createView( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @RequestBody CreateViewRequest createViewRequest) { - log.info("CREATE VIEW REQUEST {}", createViewRequest); - - LoadViewResponse createViewResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false), - catalog -> { - // ensure namespace exists - Namespace ns = namespaceFromString(namespace); - catalog.loadNamespaceMetadata(ns); - return CatalogHandlers.createView(catalog, ns, createViewRequest); - }, - null); - - log.info("CREATE VIEW RESPONSE {}", createViewResponse); - return createViewResponse; - } - - @PostMapping( - value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", - produces = MediaType.APPLICATION_JSON_VALUE, - consumes = MediaType.APPLICATION_JSON_VALUE) - public LoadViewResponse updateView( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("view") String view, - @RequestBody UpdateTableRequest updateViewRequest) { - log.info("UPDATE VIEW REQUEST {}.{}, body {} ", namespace, view, updateViewRequest); - - LoadViewResponse updateViewResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, - platformInstance, - tableIdFromString(namespace, view), - DataOperation.MANAGE_VIEWS, - false), - catalog -> - CatalogHandlers.updateView( - catalog, tableIdFromString(namespace, view), updateViewRequest), - null); - - log.info("UPDATE VIEW RESPONSE {}", updateViewResponse); - return updateViewResponse; - } - - @GetMapping( - value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", - produces = MediaType.APPLICATION_JSON_VALUE) - public LoadViewResponse loadView( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("view") String view) { - log.info("GET VIEW REQUEST {} {}.{}", platformInstance, namespace, view); - - Namespace ns = RESTUtil.decodeNamespace(URLEncoder.encode(namespace, Charset.defaultCharset())); - TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(view)); - LoadViewResponse getViewResponse = - catalogOperation( - platformInstance, - request, - operationContext -> - authorize( - operationContext, - platformInstance, - tableIdFromString(namespace, view), - DataOperation.READ_ONLY, - false), - catalog -> CatalogHandlers.loadView(catalog, tableIdFromString(namespace, view)), - null); - log.info("LOAD VIEW RESPONSE {}", getViewResponse); - return getViewResponse; - } - - @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/views/{view}") - public void dropView( - HttpServletRequest request, - @PathVariable("prefix") String platformInstance, - @PathVariable("namespace") String namespace, - @PathVariable("view") String view) { - log.info("DROP VIEW REQUEST ns {} table {}", namespace, view); - Namespace ns = RESTUtil.decodeNamespace(URLEncoder.encode(namespace, Charset.defaultCharset())); - TableIdentifier tableIdentifier = TableIdentifier.of(ns, RESTUtil.decodeString(view)); - - catalogOperation( - platformInstance, - request, - operationContext -> - authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false), - catalog -> { - CatalogHandlers.dropView(catalog, tableIdFromString(namespace, view)); - return null; - }, - null); - log.info("DROPPED VIEW {}", tableIdentifier); - } -} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouse.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouse.java new file mode 100644 index 00000000000000..2615ae670a7864 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouse.java @@ -0,0 +1,275 @@ +package io.datahubproject.iceberg.catalog; + +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; +import static io.datahubproject.iceberg.catalog.Utils.*; + +import com.google.common.util.concurrent.Striped; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataplatforminstance.IcebergWarehouseInfo; +import com.linkedin.dataset.IcebergCatalogInfo; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.platformresource.PlatformResourceInfo; +import com.linkedin.secret.DataHubSecretValue; +import com.linkedin.util.Pair; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.services.SecretService; +import java.net.URISyntaxException; +import java.util.*; +import java.util.concurrent.locks.Lock; +import lombok.Getter; +import lombok.SneakyThrows; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.*; + +public class DataHubIcebergWarehouse { + + public static final String DATASET_ICEBERG_METADATA_ASPECT_NAME = "icebergCatalogInfo"; + public static final String DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME = + "icebergWarehouseInfo"; + + private final EntityService entityService; + + private final SecretService secretService; + + private final OperationContext operationContext; + + private final IcebergWarehouseInfo icebergWarehouse; + + @Getter private final String platformInstance; + + // TODO: Need to handle locks for deployments with multiple GMS replicas. + private static final Striped resourceLocks = + Striped.lazyWeakLock(Runtime.getRuntime().availableProcessors() * 2); + + private DataHubIcebergWarehouse( + String platformInstance, + IcebergWarehouseInfo icebergWarehouse, + EntityService entityService, + SecretService secretService, + OperationContext operationContext) { + this.platformInstance = platformInstance; + this.icebergWarehouse = icebergWarehouse; + this.entityService = entityService; + this.secretService = secretService; + this.operationContext = operationContext; + } + + public static DataHubIcebergWarehouse of( + String platformInstance, + EntityService entityService, + SecretService secretService, + OperationContext operationContext) { + Urn platformInstanceUrn = Utils.platformInstanceUrn(platformInstance); + RecordTemplate warehouseAspect = + entityService.getLatestAspect( + operationContext, + platformInstanceUrn, + DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME); + + if (warehouseAspect == null) { + throw new NotFoundException("Unknown warehouse " + platformInstance); + } + + IcebergWarehouseInfo icebergWarehouse = new IcebergWarehouseInfo(warehouseAspect.data()); + return new DataHubIcebergWarehouse( + platformInstance, icebergWarehouse, entityService, secretService, operationContext); + } + + public CredentialProvider.StorageProviderCredentials getStorageProviderCredentials() { + + Urn clientIdUrn, clientSecretUrn; + String role, region; + Integer expirationSeconds; + + clientIdUrn = icebergWarehouse.getClientId(); + clientSecretUrn = icebergWarehouse.getClientSecret(); + role = icebergWarehouse.getRole(); + region = icebergWarehouse.getRegion(); + expirationSeconds = icebergWarehouse.getTempCredentialExpirationSeconds(); + + Map> credsMap = + entityService.getLatestAspects( + operationContext, + Set.of(clientIdUrn, clientSecretUrn), + Set.of("dataHubSecretValue"), + false); + + DataHubSecretValue clientIdValue = + new DataHubSecretValue(credsMap.get(clientIdUrn).get(0).data()); + + String clientId = secretService.decrypt(clientIdValue.getValue()); + + DataHubSecretValue clientSecretValue = + new DataHubSecretValue(credsMap.get(clientSecretUrn).get(0).data()); + String clientSecret = secretService.decrypt(clientSecretValue.getValue()); + + return new CredentialProvider.StorageProviderCredentials( + clientId, clientSecret, role, region, expirationSeconds); + } + + public String getDataRoot() { + return icebergWarehouse.getDataRoot(); + } + + public Optional getDatasetUrn(TableIdentifier tableIdentifier) { + Urn resourceUrn = resourceUrn(tableIdentifier); + PlatformResourceInfo platformResourceInfo = + (PlatformResourceInfo) + entityService.getLatestAspect( + operationContext, resourceUrn, PLATFORM_RESOURCE_INFO_ASPECT_NAME); + if (platformResourceInfo == null) { + return Optional.empty(); + } + try { + return Optional.of(DatasetUrn.createFromString(platformResourceInfo.getPrimaryKey())); + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid dataset urn " + platformResourceInfo.getPrimaryKey(), e); + } + } + + public IcebergCatalogInfo getIcebergMetadata(TableIdentifier tableIdentifier) { + Optional datasetUrn = getDatasetUrn(tableIdentifier); + if (datasetUrn.isEmpty()) { + return null; + } + + IcebergCatalogInfo icebergMeta = + (IcebergCatalogInfo) + entityService.getLatestAspect( + operationContext, datasetUrn.get(), DATASET_ICEBERG_METADATA_ASPECT_NAME); + + if (icebergMeta == null) { + throw new IllegalStateException( + String.format( + "IcebergMetadata not found for resource %s, dataset %s", + resourceUrn(tableIdentifier), datasetUrn.get())); + } + return icebergMeta; + } + + public Pair getIcebergMetadataEnveloped( + TableIdentifier tableIdentifier) { + Optional datasetUrn = getDatasetUrn(tableIdentifier); + if (datasetUrn.isEmpty()) { + return null; + } + + try { + EnvelopedAspect existingEnveloped = + entityService.getLatestEnvelopedAspect( + operationContext, + DATASET_ENTITY_NAME, + datasetUrn.get(), + DATASET_ICEBERG_METADATA_ASPECT_NAME); + if (existingEnveloped == null) { + throw new IllegalStateException( + String.format( + "IcebergMetadata not found for resource %s, dataset %s", + resourceUrn(tableIdentifier), datasetUrn.get())); + } + return Pair.of(existingEnveloped, datasetUrn.get()); + } catch (Exception e) { + throw new RuntimeException( + "Error fetching IcebergMetadata aspect for dataset " + datasetUrn.get(), e); + } + } + + public boolean deleteDataset(TableIdentifier tableIdentifier) { + Urn resourceUrn = resourceUrn(tableIdentifier); + + // guard against concurrent modifications that depend on the resource (rename table/view) + Lock lock = resourceLocks.get(resourceUrn); + lock.lock(); + try { + if (!entityService.exists(operationContext, resourceUrn)) { + return false; + } + Optional urn = getDatasetUrn(tableIdentifier); + entityService.deleteUrn(operationContext, resourceUrn); + urn.ifPresent(x -> entityService.deleteUrn(operationContext, x)); + return true; + } finally { + lock.unlock(); + } + } + + public DatasetUrn createDataset( + TableIdentifier tableIdentifier, boolean view, AuditStamp auditStamp) { + String datasetName = platformInstance + "." + UUID.randomUUID(); + DatasetUrn datasetUrn = new DatasetUrn(platformUrn(), datasetName, fabricType()); + createResource(datasetUrn, tableIdentifier, view, auditStamp); + return datasetUrn; + } + + public DatasetUrn renameDataset( + TableIdentifier fromTableId, TableIdentifier toTableId, boolean view, AuditStamp auditStamp) { + + // guard against concurrent modifications to the resource (other renames, deletion) + Lock lock = resourceLocks.get(resourceUrn(fromTableId)); + lock.lock(); + + try { + Optional optDatasetUrn = getDatasetUrn(fromTableId); + if (optDatasetUrn.isEmpty()) { + if (view) { + throw new NoSuchViewException( + "No such view %s", fullTableName(platformInstance, fromTableId)); + } else { + throw new NoSuchTableException( + "No such table %s", fullTableName(platformInstance, fromTableId)); + } + } + + DatasetUrn datasetUrn = optDatasetUrn.get(); + try { + createResource(datasetUrn, toTableId, view, auditStamp); + } catch (ValidationException e) { + throw new AlreadyExistsException( + "%s already exists: %s", + view ? "View" : "Table", fullTableName(platformInstance, toTableId)); + } + entityService.deleteUrn(operationContext, resourceUrn(fromTableId)); + return datasetUrn; + } finally { + lock.unlock(); + } + } + + private void createResource( + DatasetUrn datasetUrn, TableIdentifier tableIdentifier, boolean view, AuditStamp auditStamp) { + PlatformResourceInfo resourceInfo = + new PlatformResourceInfo().setPrimaryKey(datasetUrn.toString()); + resourceInfo.setResourceType(view ? "icebergView" : "icebergTable"); + + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(resourceUrn(tableIdentifier)); + mcp.setEntityType(PLATFORM_RESOURCE_ENTITY_NAME); + mcp.setAspectName(PLATFORM_RESOURCE_INFO_ASPECT_NAME); + mcp.setChangeType(ChangeType.CREATE_ENTITY); + mcp.setAspect(serializeAspect(resourceInfo)); + + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } + + private FabricType fabricType() { + return icebergWarehouse.getEnv(); + } + + @SneakyThrows + private Urn resourceUrn(TableIdentifier tableIdentifier) { + return Urn.createFromString( + String.format( + "urn:li:platformResource:%s.%s", + PLATFORM_NAME, CatalogUtil.fullTableName(platformInstance, tableIdentifier))); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubRestCatalog.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubRestCatalog.java new file mode 100644 index 00000000000000..cbe58bf70546b5 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubRestCatalog.java @@ -0,0 +1,487 @@ +package io.datahubproject.iceberg.catalog; + +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; +import static io.datahubproject.iceberg.catalog.Utils.*; + +import com.google.common.base.Joiner; +import com.linkedin.common.AuditStamp; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.container.Container; +import com.linkedin.container.ContainerProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.metadata.search.utils.QueryUtils; +import com.linkedin.metadata.utils.CriterionUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.S3CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.*; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import org.apache.iceberg.*; +import org.apache.iceberg.aws.s3.S3FileIO; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.io.InputFile; +import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; +import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; +import org.apache.iceberg.view.BaseMetastoreViewCatalog; +import org.apache.iceberg.view.ViewOperations; + +public class DataHubRestCatalog extends BaseMetastoreViewCatalog implements SupportsNamespaces { + private final CredentialProvider credentialProvider; + + static final int PAGE_SIZE = 100; + + // Upper bound for results on list namespaces/tables/views. Must use pagination for anything more + // than that. + static final int MAX_LIST_SIZE = 1000; + + private final EntityService entityService; + + private final EntitySearchService searchService; + + private final OperationContext operationContext; + + private final CloseableGroup closeableGroup; + + private final DataHubIcebergWarehouse warehouse; + + private final String warehouseRoot; + + private static final String CONTAINER_SUB_TYPE = "Namespace"; + + public DataHubRestCatalog( + EntityService entityService, + EntitySearchService searchService, + OperationContext operationContext, + DataHubIcebergWarehouse warehouse, + CredentialProvider credentialProvider) { + this.entityService = entityService; + this.searchService = searchService; + this.operationContext = operationContext; + this.credentialProvider = credentialProvider; + this.warehouse = warehouse; + + if (warehouse.getDataRoot().endsWith("/")) { + this.warehouseRoot = warehouse.getDataRoot(); + } else { + this.warehouseRoot = warehouse.getDataRoot() + "/"; + } + + this.closeableGroup = new CloseableGroup(); + this.closeableGroup.setSuppressCloseFailure(true); + } + + @Override + public void renameView(TableIdentifier fromTableId, TableIdentifier toTableId) { + renameTableOrView(fromTableId, toTableId, true); + } + + @Override + public void initialize(String name, Map properties) {} + + @Override + protected TableOperations newTableOps(TableIdentifier tableIdentifier) { + return new DataHubTableOps( + warehouse, tableIdentifier, entityService, operationContext, new S3FileIOFactory()); + } + + @Override + protected String defaultWarehouseLocation(TableIdentifier tableIdentifier) { + return warehouseRoot + tableIdentifier.toString().replaceAll("\\.", "/"); + } + + @Override + public List listTables(Namespace namespace) { + return listTablesOrViews(namespace, "Table"); + } + + @Override + public boolean dropTable(TableIdentifier tableIdentifier, boolean purge) { + if (purge) { + throw new UnsupportedOperationException(); + } + + return warehouse.deleteDataset(tableIdentifier); + } + + @Override + public Table registerTable(TableIdentifier identifier, String metadataFileLocation) { + if (tableExists(identifier)) { + throw new AlreadyExistsException("Table already exists: %s", identifier); + } + + FileIO io = + new S3FileIOFactory() + .createIO( + platformInstance(), + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of(parentDir(metadataFileLocation))); + InputFile metadataFile = io.newInputFile(metadataFileLocation); + TableMetadata metadata = TableMetadataParser.read(io, metadataFile); + + TableOperations ops = newTableOps(identifier); + ops.commit(null, metadata); + + return new BaseTable(ops, fullTableName(name(), identifier), metricsReporter()); + } + + @Override + public void renameTable(TableIdentifier fromTableId, TableIdentifier toTableId) { + renameTableOrView(fromTableId, toTableId, false); + } + + private void renameTableOrView( + TableIdentifier fromTableId, TableIdentifier toTableId, boolean view) { + if (!fromTableId.namespace().equals(toTableId.namespace())) { + // check target namespace exists + if (!entityService.exists( + operationContext, containerUrn(platformInstance(), toTableId.namespace()))) { + throw new NoSuchNamespaceException("Namespace does not exist: " + toTableId.namespace()); + } + } + AuditStamp auditStamp = auditStamp(); + DatasetUrn datasetUrn = warehouse.renameDataset(fromTableId, toTableId, false, auditStamp); + DatasetProperties datasetProperties = new DatasetProperties(); + datasetProperties.setName(toTableId.name()); + datasetProperties.setQualifiedName(fullTableName(platformInstance(), toTableId)); + + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityType(DATASET_ENTITY_NAME); + mcp.setAspectName(DATASET_PROPERTIES_ASPECT_NAME); + mcp.setEntityUrn(datasetUrn); + mcp.setAspect(serializeAspect(datasetProperties)); + mcp.setChangeType(ChangeType.UPSERT); + ingestMcp(mcp, auditStamp); + + if (!fromTableId.namespace().equals(toTableId.namespace())) { + Container container = new Container(); + container.setContainer(containerUrn(platformInstance(), toTableId.namespace())); + + MetadataChangeProposal containerMcp = new MetadataChangeProposal(); + containerMcp.setEntityType(DATASET_ENTITY_NAME); + containerMcp.setAspectName(CONTAINER_ASPECT_NAME); + containerMcp.setEntityUrn(datasetUrn); + containerMcp.setAspect(serializeAspect(container)); + containerMcp.setChangeType(ChangeType.UPSERT); + StringMap headers = + new StringMap( + Collections.singletonMap(SYNC_INDEX_UPDATE_HEADER_NAME, Boolean.toString(true))); + mcp.setHeaders(headers); + containerMcp.setHeaders(headers); + ingestMcp(containerMcp, auditStamp); + } + } + + @Override + public void createNamespace(Namespace namespace, Map properties) { + AuditStamp auditStamp = auditStamp(); + Urn containerUrn = containerUrn(platformInstance(), namespace); + + int nLevels = namespace.length(); + if (nLevels > 1) { + String[] parentLevels = Arrays.copyOfRange(namespace.levels(), 0, nLevels - 1); + Urn parentContainerUrn = containerUrn(platformInstance(), parentLevels); + if (!entityService.exists(operationContext, parentContainerUrn)) { + throw new NoSuchNamespaceException( + "Parent namespace %s does not exist in platformInstance-catalog %s", + Joiner.on(".").join(parentLevels), platformInstance()); + } + ingestContainerAspect( + containerUrn, + CONTAINER_ASPECT_NAME, + new Container().setContainer(parentContainerUrn), + auditStamp); + } + + ingestContainerAspect( + containerUrn, + SUB_TYPES_ASPECT_NAME, + new SubTypes().setTypeNames(new StringArray(CONTAINER_SUB_TYPE)), + auditStamp); + + ingestContainerProperties(namespace, properties, auditStamp); + + MetadataChangeProposal platformInstanceMcp = + platformInstanceMcp(platformInstance(), containerUrn, CONTAINER_ENTITY_NAME); + ingestMcp(platformInstanceMcp, auditStamp); + } + + @Override + public List listNamespaces(Namespace namespace) throws NoSuchNamespaceException { + Filter filter; + if (namespace.isEmpty()) { + Criterion noParentCriterion = CriterionUtils.buildCriterion("container", Condition.IS_NULL); + Criterion subTypeCriterion = + CriterionUtils.buildCriterion("typeNames", Condition.EQUAL, CONTAINER_SUB_TYPE); + Criterion dataPlatformInstanceCriterion = + CriterionUtils.buildCriterion( + "platformInstance", + Condition.EQUAL, + platformInstanceUrn(platformInstance()).toString()); + filter = + QueryUtils.getFilterFromCriteria( + List.of(noParentCriterion, subTypeCriterion, dataPlatformInstanceCriterion)); + } else { + filter = + QueryUtils.newFilter( + "container.keyword", containerUrn(platformInstance(), namespace).toString()); + } + + SearchResult searchResult = search(filter, CONTAINER_ENTITY_NAME); + + return searchResult.getEntities().stream() + .map( + x -> { + String namespaceName = namespaceNameFromContainerUrn(x.getEntity()); + return Namespace.of(namespaceName.split("\\.")); + }) + .collect(Collectors.toList()); + } + + @Override + public Map loadNamespaceMetadata(Namespace namespace) + throws NoSuchNamespaceException { + + ContainerProperties containerProperties = + (ContainerProperties) + entityService.getLatestAspect( + operationContext, + containerUrn(platformInstance(), namespace), + CONTAINER_PROPERTIES_ASPECT_NAME); + + if (containerProperties == null) { + throw new NoSuchNamespaceException("Namespace does not exist: " + namespace); + } + + return new HashMap<>(containerProperties.getCustomProperties()); + } + + @Override + public boolean dropNamespace(Namespace namespace) throws NamespaceNotEmptyException { + Urn containerUrn = containerUrn(platformInstance(), namespace); + if (!entityService.exists(operationContext, containerUrn)) { + return false; + } + + Filter filter = QueryUtils.newFilter("container.keyword", containerUrn.toString()); + + if (searchIsEmpty(filter, CONTAINER_ENTITY_NAME, DATASET_ENTITY_NAME)) { + // TODO handle race conditions + entityService.deleteUrn(operationContext, containerUrn); + return true; + } else { + throw new NamespaceNotEmptyException("Namespace %s is not empty", namespace); + } + } + + @Override + public boolean setProperties(Namespace namespace, Map map) + throws NoSuchNamespaceException { + // not required for our purposes currently + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeProperties(Namespace namespace, Set set) + throws NoSuchNamespaceException { + // not required for our purposes currently + throw new UnsupportedOperationException(); + } + + private void ingestContainerProperties( + Namespace namespace, Map properties, AuditStamp auditStamp) { + ingestContainerAspect( + containerUrn(platformInstance(), namespace), + CONTAINER_PROPERTIES_ASPECT_NAME, + new ContainerProperties() + .setName(namespace.levels()[namespace.length() - 1]) + .setCustomProperties(new StringMap(properties)), + auditStamp); + } + + public UpdateNamespacePropertiesResponse updateNamespaceProperties( + Namespace namespace, UpdateNamespacePropertiesRequest request) { + + Map properties = loadNamespaceMetadata(namespace); + + Set missing = new HashSet<>(); + request.removals().stream() + .filter(Predicate.not(properties::containsKey)) + .forEach(missing::add); + + UpdateNamespacePropertiesResponse.Builder responseBuilder = + UpdateNamespacePropertiesResponse.builder(); + + request.removals().stream() + .filter(Predicate.not(missing::contains)) + .forEach(responseBuilder::addRemoved); + + responseBuilder.addUpdated(request.updates().keySet()); + responseBuilder.addMissing(missing); + + properties.putAll(request.updates()); + properties.keySet().removeAll(request.removals()); + + ingestContainerProperties(namespace, properties, auditStamp()); + + return responseBuilder.build(); + } + + @Override + public void close() throws IOException { + super.close(); + this.closeableGroup.close(); + } + + private void ingestContainerAspect( + Urn containerUrn, String aspectName, RecordTemplate aspect, AuditStamp auditStamp) { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + + mcp.setEntityUrn(containerUrn); + mcp.setEntityType(CONTAINER_ENTITY_NAME); + mcp.setAspectName(aspectName); + mcp.setAspect(serializeAspect(aspect)); + mcp.setChangeType(ChangeType.UPSERT); + + StringMap headers = + new StringMap( + Collections.singletonMap(SYNC_INDEX_UPDATE_HEADER_NAME, Boolean.toString(true))); + mcp.setHeaders(headers); + + ingestMcp(mcp, auditStamp); + } + + private void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } + + private List listTablesOrViews(Namespace namespace, String typeName) { + Filter filter = + QueryUtils.newFilter( + Map.of( + "container.keyword", + containerUrn(platformInstance(), namespace).toString(), + "typeNames", + typeName)); + + SearchResult searchResult = search(filter, DATASET_ENTITY_NAME); + + Set urns = + searchResult.getEntities().stream() + .map(SearchEntity::getEntity) + .collect(Collectors.toSet()); + + Map> aspects = + entityService.getLatestAspects( + operationContext, urns, Set.of(DATASET_PROPERTIES_ASPECT_NAME), false); + + return aspects.values().stream() + .filter(x -> x != null && !x.isEmpty()) + .map(x -> (DatasetProperties) x.get(0)) + .map(DatasetProperties::getQualifiedName) + .map( + x -> { + String[] parts = x.split("\\."); + // ignore first part which is the warehouse name + return TableIdentifier.of(Arrays.copyOfRange(parts, 1, parts.length)); + }) + .toList(); + } + + private SearchResult search(Filter filter, String entityName) { + // Go through pages. + SearchEntityArray allEntities = new SearchEntityArray(); + int startIndex = 0; + int totalCount; + + do { + SearchResult pageResult = + searchService.search( + operationContext, List.of(entityName), "*", filter, List.of(), startIndex, PAGE_SIZE); + totalCount = pageResult.getNumEntities(); + if (totalCount > MAX_LIST_SIZE) { + totalCount = MAX_LIST_SIZE; + } + allEntities.addAll(pageResult.getEntities()); + startIndex += PAGE_SIZE; + } while (startIndex < totalCount); + + SearchResult allResults = new SearchResult(); + allResults.setEntities(allEntities); + return allResults; + } + + private boolean searchIsEmpty(Filter filter, String... entityNames) { + SearchResult searchResult = + searchService.search(operationContext, List.of(entityNames), "*", filter, List.of(), 0, 1); + + return searchResult.getEntities().isEmpty(); + } + + private class S3FileIOFactory implements FileIOFactory { + @Override + public FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, Set locations) { + + FileIO io = new S3FileIO(); + Map creds = + credentialProvider.getCredentials( + new S3CredentialProvider.CredentialsCacheKey(platformInstance, privilege, locations), + warehouse.getStorageProviderCredentials()); + io.initialize(creds); + closeableGroup.addCloseable(io); + return io; + } + + @Override + public FileIO createIO( + String platformInstance, PoliciesConfig.Privilege privilege, TableMetadata tableMetadata) { + return createIO(platformInstance, privilege, locations(tableMetadata)); + } + } + + @Override + protected ViewOperations newViewOps(TableIdentifier tableIdentifier) { + return new DataHubViewOps( + warehouse, tableIdentifier, entityService, operationContext, new S3FileIOFactory()); + } + + @Override + public List listViews(Namespace namespace) { + return listTablesOrViews(namespace, "View"); + } + + @Override + public boolean dropView(TableIdentifier tableIdentifier) { + return warehouse.deleteDataset(tableIdentifier); + } + + private String platformInstance() { + return warehouse.getPlatformInstance(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubTableOps.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubTableOps.java new file mode 100644 index 00000000000000..12101d198b33cd --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubTableOps.java @@ -0,0 +1,54 @@ +package io.datahubproject.iceberg.catalog; + +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import lombok.SneakyThrows; +import org.apache.iceberg.BaseMetastoreTableOperations; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.FileIO; + +public class DataHubTableOps extends BaseMetastoreTableOperations { + + private final TableOrViewOpsDelegate delegate; + + public DataHubTableOps( + DataHubIcebergWarehouse warehouse, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + this.delegate = + new TableOpsDelegate( + warehouse, tableIdentifier, entityService, operationContext, fileIOFactory); + } + + @Override + public TableMetadata refresh() { + return delegate.refresh(); + } + + @Override + public TableMetadata current() { + return delegate.current(); + } + + @SneakyThrows + @Override + protected void doCommit(TableMetadata base, TableMetadata metadata) { + delegate.doCommit( + base == null ? null : new MetadataWrapper<>(base), + new MetadataWrapper<>(metadata), + () -> writeNewMetadataIfRequired(base == null, metadata)); + } + + @Override + protected String tableName() { + return delegate.name(); + } + + @Override + public FileIO io() { + return delegate.io(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubViewOps.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubViewOps.java new file mode 100644 index 00000000000000..ae292ba7f044fa --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataHubViewOps.java @@ -0,0 +1,61 @@ +package io.datahubproject.iceberg.catalog; + +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.view.BaseViewOperations; +import org.apache.iceberg.view.ViewMetadata; + +@Slf4j +public class DataHubViewOps extends BaseViewOperations { + + private final TableOrViewOpsDelegate delegate; + + public DataHubViewOps( + DataHubIcebergWarehouse warehouse, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + this.delegate = + new ViewOpsDelegate( + warehouse, tableIdentifier, entityService, operationContext, fileIOFactory); + } + + @Override + public ViewMetadata refresh() { + return delegate.refresh(); + } + + @Override + public ViewMetadata current() { + return delegate.current(); + } + + @Override + protected void doRefresh() { + throw new UnsupportedOperationException(); + } + + @SneakyThrows + @Override + protected void doCommit(ViewMetadata base, ViewMetadata metadata) { + delegate.doCommit( + base == null ? null : new MetadataWrapper<>(base), + new MetadataWrapper<>(metadata), + () -> writeNewMetadataIfRequired(metadata)); + } + + @Override + protected String viewName() { + return delegate.name(); + } + + @Override + public FileIO io() { + return delegate.io(); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataOperation.java similarity index 87% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataOperation.java index 67941b05f4e2c4..9999cb5761fb16 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/DataOperation.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/DataOperation.java @@ -1,4 +1,4 @@ -package com.datahub.iceberg.catalog; +package io.datahubproject.iceberg.catalog; import static com.linkedin.metadata.authorization.PoliciesConfig.*; @@ -17,7 +17,9 @@ public enum DataOperation { READ_WRITE(DATA_READ_WRITE_PRIVILEGE, DATA_MANAGE_TABLES_PRIVILEGE), MANAGE_VIEWS(DATA_MANAGE_VIEWS_PRIVILEGE, DATA_MANAGE_TABLES_PRIVILEGE), MANAGE_TABLES(DATA_MANAGE_TABLES_PRIVILEGE), - MANAGE_NAMESPACES(DATA_MANAGE_NAMESPACES_PRIVILEGE); + MANAGE_NAMESPACES(DATA_MANAGE_NAMESPACES_PRIVILEGE), + + LIST(DATA_LIST_ENTITIES_PRIVILEGE); public final List ascendingPrivileges; public final List descendingPrivileges; diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/FileIOFactory.java similarity index 90% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/FileIOFactory.java index 8726bce1477486..c3b8b28d01eb23 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/FileIOFactory.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/FileIOFactory.java @@ -1,4 +1,4 @@ -package com.datahub.iceberg.catalog; +package io.datahubproject.iceberg.catalog; import com.linkedin.metadata.authorization.PoliciesConfig; import java.util.Set; diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java new file mode 100644 index 00000000000000..9f3ddc01d3fc41 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java @@ -0,0 +1,478 @@ +package io.datahubproject.iceberg.catalog; + +import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.Constants.VIEW_PROPERTIES_ASPECT_NAME; +import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; +import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; +import static io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse.DATASET_ICEBERG_METADATA_ASPECT_NAME; +import static io.datahubproject.iceberg.catalog.Utils.*; +import static io.datahubproject.iceberg.catalog.Utils.platformInstanceMcp; +import static org.apache.commons.lang3.StringUtils.capitalize; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringArray; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProfile; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.dataset.IcebergCatalogInfo; +import com.linkedin.dataset.ViewProperties; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.authorization.PoliciesConfig; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.schema.SchemaMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.schematron.converters.avro.AvroSchemaConverter; +import java.util.Set; +import java.util.function.Supplier; +import lombok.extern.slf4j.Slf4j; +import org.apache.avro.Schema; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.SnapshotSummary; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; +import org.apache.iceberg.avro.AvroSchemaUtil; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.*; +import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.view.*; + +@Slf4j +abstract class TableOrViewOpsDelegate { + + private final DataHubIcebergWarehouse warehouse; + private FileIO io; + private final TableIdentifier tableIdentifier; + private final EntityService entityService; + private final OperationContext operationContext; + private final FileIOFactory fileIOFactory; + private volatile M currentMetadata = null; + private volatile boolean shouldRefresh = true; + + TableOrViewOpsDelegate( + DataHubIcebergWarehouse warehouse, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + this.warehouse = warehouse; + this.tableIdentifier = tableIdentifier; + this.entityService = entityService; + this.operationContext = operationContext; + this.fileIOFactory = fileIOFactory; + } + + public M refresh() { + IcebergCatalogInfo icebergMeta = warehouse.getIcebergMetadata(tableIdentifier); + + if (icebergMeta == null || !isExpectedType(icebergMeta.isView())) { + return null; + } + + String location = icebergMeta.getMetadataPointer(); + if (io == null) { + String locationDir = location.substring(0, location.lastIndexOf("/")); + io = + fileIOFactory.createIO( + platformInstance(), PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, Set.of(locationDir)); + } + + currentMetadata = readMetadata(io, location); + shouldRefresh = false; + return currentMetadata; + } + + M current() { + if (shouldRefresh) { + return refresh(); + } + return currentMetadata; + } + + protected void doCommit( + MetadataWrapper base, MetadataWrapper metadata, Supplier metadataWriter) { + + Pair existingDatasetAspect = + warehouse.getIcebergMetadataEnveloped(tableIdentifier); + + boolean creation = base == null; + + if (existingDatasetAspect != null) { + if (creation) { + throw new AlreadyExistsException("%s already exists: %s", capitalize(type()), name()); + } + IcebergCatalogInfo existingMetadata = + new IcebergCatalogInfo(existingDatasetAspect.getFirst().getValue().data()); + if (!isExpectedType(existingMetadata.isView())) { + throw noSuchEntityException(); + } + if (!existingMetadata.getMetadataPointer().equals(base.metadataFileLocation())) { + throw new CommitFailedException("Cannot commit to %s %s: stale metadata", type(), name()); + } + } + + if (!creation && existingDatasetAspect == null) { + throw new IllegalStateException( + "Iceberg metadata aspect not found for " + + tableIdentifier + + " with base metadata-file " + + base.location()); + } + + DatasetUrn datasetUrn; + AuditStamp auditStamp = auditStamp(); + // attempt to commit + io = + fileIOFactory.createIO( + platformInstance(), + PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, + Set.of(metadata.location())); + String newMetadataLocation = metadataWriter.get(); + + if (creation) { + try { + datasetUrn = warehouse.createDataset(tableIdentifier, isView(), auditStamp); + } catch (ValidationException e) { + throw new AlreadyExistsException("%s already exists: %s", capitalize(type()), name()); + } + } else { + datasetUrn = existingDatasetAspect.getSecond(); + } + + MetadataChangeProposal icebergMcp = newMcp(DATASET_ICEBERG_METADATA_ASPECT_NAME, datasetUrn); + icebergMcp.setAspect( + serializeAspect( + new IcebergCatalogInfo().setMetadataPointer(newMetadataLocation).setView(isView()))); + + if (creation) { + icebergMcp.setChangeType(ChangeType.CREATE_ENTITY); + } else { + String existingVersion = existingDatasetAspect.getFirst().getSystemMetadata().getVersion(); + StringMap headers = icebergMcp.getHeaders(); + if (headers == null) { + headers = new StringMap(); + icebergMcp.setHeaders(headers); + } + headers.put(HTTP_HEADER_IF_VERSION_MATCH, existingVersion); + icebergMcp.setChangeType( + ChangeType.UPSERT); // ideally should be UPDATE, but seems not supported yet. + } + try { + ingestMcp(icebergMcp, auditStamp); + } catch (ValidationException e) { + if (creation) { + // this is likely because table/view already exists i.e. created concurrently in a race + // condition + throw new AlreadyExistsException("%s already exists: %s", capitalize(type()), name()); + } else { + throw new CommitFailedException( + "Cannot commit to %s %s: stale metadata", capitalize(type()), name()); + } + } + + if (base == null || (base.currentSchemaId() != metadata.currentSchemaId())) { + // schema changed + Schema avroSchema = AvroSchemaUtil.convert(metadata.schema(), name()); + AvroSchemaConverter converter = AvroSchemaConverter.builder().build(); + SchemaMetadata schemaMetadata = + converter.toDataHubSchema(avroSchema, false, false, platformUrn(), null); + MetadataChangeProposal schemaMcp = newMcp(SCHEMA_METADATA_ASPECT_NAME, datasetUrn); + schemaMcp.setAspect(serializeAspect(schemaMetadata)); + schemaMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(schemaMcp, auditStamp); + } + + if (creation) { + DatasetProperties datasetProperties = new DatasetProperties(); + datasetProperties.setName(tableIdentifier.name()); + datasetProperties.setQualifiedName(name()); + + MetadataChangeProposal datasetPropertiesMcp = + newMcp(DATASET_PROPERTIES_ASPECT_NAME, datasetUrn, true); + datasetPropertiesMcp.setAspect(serializeAspect(datasetProperties)); + datasetPropertiesMcp.setChangeType(ChangeType.UPSERT); + + ingestMcp(datasetPropertiesMcp, auditStamp); + + MetadataChangeProposal platformInstanceMcp = + platformInstanceMcp(platformInstance(), datasetUrn, DATASET_ENTITY_NAME); + ingestMcp(platformInstanceMcp, auditStamp); + + Container container = new Container(); + container.setContainer(containerUrn(platformInstance(), tableIdentifier.namespace())); + + MetadataChangeProposal containerMcp = newMcp(CONTAINER_ASPECT_NAME, datasetUrn, true); + containerMcp.setAspect(serializeAspect(container)); + containerMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(containerMcp, auditStamp); + + SubTypes subTypes = new SubTypes().setTypeNames(new StringArray(capitalize(type()))); + MetadataChangeProposal subTypesMcp = newMcp(SUB_TYPES_ASPECT_NAME, datasetUrn, true); + subTypesMcp.setAspect(serializeAspect(subTypes)); + subTypesMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(subTypesMcp, auditStamp); + } + + sendProfileUpdate(metadata, auditStamp, datasetUrn); + onCommit(metadata.metadata(), auditStamp, datasetUrn); + } + + protected abstract DatasetProfile getDataSetProfile(M metadata); + + private void sendProfileUpdate( + MetadataWrapper metadata, AuditStamp auditStamp, DatasetUrn datasetUrn) { + + DatasetProfile dataSetProfile = getDataSetProfile(metadata.metadata()); + if (dataSetProfile != null) { + dataSetProfile.setTimestampMillis(auditStamp.getTime()); + + MetadataChangeProposal dataSetProfileMcp = newMcp(DATASET_PROFILE_ASPECT_NAME, datasetUrn); + dataSetProfileMcp.setAspect(serializeAspect(dataSetProfile)); + dataSetProfileMcp.setChangeType(ChangeType.UPSERT); + ingestMcp(dataSetProfileMcp, auditStamp); + } + } + + FileIO io() { + return io; + } + + String name() { + return fullTableName(platformInstance(), tableIdentifier); + } + + private String platformInstance() { + return warehouse.getPlatformInstance(); + } + + protected MetadataChangeProposal newMcp(String aspectName, DatasetUrn datasetUrn) { + return newMcp(aspectName, datasetUrn, false); // Default to async index update + } + + protected MetadataChangeProposal newMcp( + String aspectName, DatasetUrn datasetUrn, boolean syncIndexUpdate) { + MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(datasetUrn); + mcp.setEntityType(DATASET_ENTITY_NAME); + mcp.setAspectName(aspectName); + + if (syncIndexUpdate) { + StringMap headers = new StringMap(); + headers.put(SYNC_INDEX_UPDATE_HEADER_NAME, Boolean.toString(true)); + mcp.setHeaders(headers); + } + + return mcp; + } + + protected void ingestMcp(MetadataChangeProposal mcp, AuditStamp auditStamp) { + entityService.ingestProposal(operationContext, mcp, auditStamp, false); + } + + abstract boolean isView(); + + abstract boolean isExpectedType(boolean view); + + abstract M readMetadata(FileIO io, String location); + + abstract String type(); + + abstract RuntimeException noSuchEntityException(); + + void onCommit(M metadata, AuditStamp auditStamp, DatasetUrn datasetUrn) {} +} + +@Slf4j +class ViewOpsDelegate extends TableOrViewOpsDelegate { + + ViewOpsDelegate( + DataHubIcebergWarehouse warehouse, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + super(warehouse, tableIdentifier, entityService, operationContext, fileIOFactory); + } + + @Override + boolean isView() { + return true; + } + + @Override + boolean isExpectedType(boolean view) { + return view; + } + + @Override + ViewMetadata readMetadata(FileIO io, String location) { + return ViewMetadataParser.read(io.newInputFile(location)); + } + + @Override + String type() { + return "view"; + } + + @Override + RuntimeException noSuchEntityException() { + return new NoSuchViewException("No such view %s", name()); + } + + @Override + void onCommit(ViewMetadata metadata, AuditStamp auditStamp, DatasetUrn datasetUrn) { + SQLViewRepresentation sqlViewRepresentation = null; + for (ViewRepresentation representation : metadata.currentVersion().representations()) { + if (representation instanceof SQLViewRepresentation) { + sqlViewRepresentation = (SQLViewRepresentation) representation; + // use only first representation, as DataHub model currently supports one SQL. + break; + } + } + if (sqlViewRepresentation == null) { + // base class is ensuring that a representation has been specified in case of replace-view. + // so, this shouldn't occur. + log.warn("No SQL representation for view {}", name()); + } else { + ViewProperties viewProperties = + new ViewProperties() + .setViewLogic(sqlViewRepresentation.sql()) + .setMaterialized(false) + .setViewLanguage(sqlViewRepresentation.dialect()); + MetadataChangeProposal viewPropertiesMcp = newMcp(VIEW_PROPERTIES_ASPECT_NAME, datasetUrn); + viewPropertiesMcp.setAspect(serializeAspect(viewProperties)); + viewPropertiesMcp.setChangeType(ChangeType.UPSERT); + + ingestMcp(viewPropertiesMcp, auditStamp); + } + } + + @Override + protected DatasetProfile getDataSetProfile(ViewMetadata metadata) { + long columnCount = metadata.schema().columns().size(); + DatasetProfile datasetProfile = new DatasetProfile(); + datasetProfile.setColumnCount(columnCount); + return datasetProfile; + } +} + +class TableOpsDelegate extends TableOrViewOpsDelegate { + + TableOpsDelegate( + DataHubIcebergWarehouse warehouse, + TableIdentifier tableIdentifier, + EntityService entityService, + OperationContext operationContext, + FileIOFactory fileIOFactory) { + super(warehouse, tableIdentifier, entityService, operationContext, fileIOFactory); + } + + @Override + protected DatasetProfile getDataSetProfile(TableMetadata metadata) { + Snapshot currentSnapshot = metadata.currentSnapshot(); + if (currentSnapshot == null) { + return null; + } + + DatasetProfile dataSetProfile = new DatasetProfile(); + if (currentSnapshot.summary() != null) { + String totalRecordsStr = currentSnapshot.summary().get(SnapshotSummary.TOTAL_RECORDS_PROP); + if (totalRecordsStr != null) { + dataSetProfile.setRowCount(Long.parseLong(totalRecordsStr)); + } + } + + long colCount = metadata.schema().columns().size(); + dataSetProfile.setColumnCount(colCount); + + return dataSetProfile; + } + + @Override + boolean isView() { + return false; + } + + @Override + boolean isExpectedType(boolean view) { + return !view; + } + + @Override + TableMetadata readMetadata(FileIO io, String location) { + return TableMetadataParser.read(io, location); + } + + @Override + String type() { + return "table"; + } + + @Override + RuntimeException noSuchEntityException() { + return new NoSuchTableException("No such table %s", name()); + } +} + +class MetadataWrapper { + final TableMetadata tableMetadata; + final ViewMetadata viewMetadata; + final boolean view; + + MetadataWrapper(TableMetadata metadata) { + this.tableMetadata = metadata; + viewMetadata = null; + view = false; + } + + MetadataWrapper(ViewMetadata metadata) { + this.viewMetadata = metadata; + tableMetadata = null; + view = true; + } + + int currentSchemaId() { + if (view) { + return viewMetadata.currentSchemaId(); + } else { + return tableMetadata.currentSchemaId(); + } + } + + org.apache.iceberg.Schema schema() { + if (view) { + return viewMetadata.schema(); + } else { + return tableMetadata.schema(); + } + } + + String location() { + if (view) { + return viewMetadata.location(); + } else { + return tableMetadata.location(); + } + } + + String metadataFileLocation() { + if (view) { + return viewMetadata.metadataFileLocation(); + } else { + return tableMetadata.metadataFileLocation(); + } + } + + M metadata() { + if (view) { + return (M) viewMetadata; + } else { + return (M) tableMetadata; + } + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/Utils.java similarity index 77% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/Utils.java index 2c0212308cc5a3..767f30acdef25a 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/Utils.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/Utils.java @@ -1,21 +1,17 @@ -package com.datahub.iceberg.catalog; +package io.datahubproject.iceberg.catalog; -import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; -import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; +import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.utils.GenericRecordUtils.serializeAspect; import com.linkedin.common.AuditStamp; import com.linkedin.common.DataPlatformInstance; -import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; -import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.Urn; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.key.DataPlatformInstanceKey; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.mxe.MetadataChangeProposal; -import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.Charset; import java.util.HashSet; @@ -29,16 +25,15 @@ import org.apache.iceberg.rest.RESTUtil; public class Utils { - private static final String PLATFORM_NAME = "nativeIceberg"; + public static final String PLATFORM_NAME = "iceberg"; + private static final String NAMESPACE_CONTAINER_PREFIX = "urn:li:container:iceberg__"; + + @SneakyThrows public static AuditStamp auditStamp() { - try { - return new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } + return new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); } public static MetadataChangeProposal platformInstanceMcp( @@ -68,11 +63,6 @@ public static Urn platformInstanceUrn(String platformInstance) { platformInstanceKey, DATA_PLATFORM_INSTANCE_ENTITY_NAME); } - public static FabricType fabricType() { - // TODO configurable fabricType - return FabricType.DEV; - } - public static Urn containerUrn(String platformInstance, Namespace ns) { return containerUrn(platformInstance, ns.levels()); } @@ -83,12 +73,7 @@ public static Urn containerUrn(String platformInstance, String[] levels) { for (String level : levels) { containerFullName.append(".").append(level); } - return Urn.createFromString("urn:li:container:nativeIceberg__" + containerFullName); - } - - public static DatasetUrn datasetUrn(String platformInstance, TableIdentifier tableIdentifier) { - return new DatasetUrn( - platformUrn(), CatalogUtil.fullTableName(platformInstance, tableIdentifier), fabricType()); + return Urn.createFromString(NAMESPACE_CONTAINER_PREFIX + containerFullName); } public static String fullTableName(String platformInstance, TableIdentifier tableIdentifier) { @@ -118,4 +103,8 @@ public static TableIdentifier tableIdFromString(String namespace, String table) public static String parentDir(String fileLocation) { return fileLocation.substring(0, fileLocation.lastIndexOf("/")); } + + public static String namespaceNameFromContainerUrn(Urn urn) { + return urn.toString().substring(NAMESPACE_CONTAINER_PREFIX.length()); + } } diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CachingCredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CachingCredentialProvider.java new file mode 100644 index 00000000000000..7aa57df101d669 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CachingCredentialProvider.java @@ -0,0 +1,32 @@ +package io.datahubproject.iceberg.catalog.credentials; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +public class CachingCredentialProvider implements CredentialProvider { + // this should be lesser than the actual token/cred expiration + private static final int EXPIRATION_MINUTES = 5; + + private final Cache> credentialCache; + + private final CredentialProvider credentialProvider; + + public CachingCredentialProvider(CredentialProvider credentialProvider) { + this.credentialProvider = credentialProvider; + this.credentialCache = + CacheBuilder.newBuilder().expireAfterWrite(EXPIRATION_MINUTES, TimeUnit.MINUTES).build(); + } + + public Map getCredentials( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { + try { + return credentialCache.get( + key, () -> credentialProvider.getCredentials(key, storageProviderCredentials)); + } catch (ExecutionException e) { + throw new RuntimeException("Error during cache lookup for credentials", e); + } + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CredentialProvider.java new file mode 100644 index 00000000000000..3a0992c25a1ff1 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/CredentialProvider.java @@ -0,0 +1,30 @@ +package io.datahubproject.iceberg.catalog.credentials; + +import com.linkedin.metadata.authorization.PoliciesConfig; +import java.util.Map; +import java.util.Set; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; + +public interface CredentialProvider { + + @EqualsAndHashCode + @AllArgsConstructor + class CredentialsCacheKey { + public final String platformInstance; + public final PoliciesConfig.Privilege privilege; + public final Set locations; + } + + @AllArgsConstructor + class StorageProviderCredentials { + public final String clientId; + public final String clientSecret; + public final String role; + public final String region; + public final Integer tempCredentialExpirationSeconds; + } + + Map getCredentials( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials); +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/S3CredentialProvider.java similarity index 77% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/S3CredentialProvider.java index 64b2e4f873b26a..66d2166e48ae90 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/S3CredentialProvider.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/credentials/S3CredentialProvider.java @@ -1,4 +1,4 @@ -package com.datahub.iceberg.catalog; +package io.datahubproject.iceberg.catalog.credentials; import static com.linkedin.metadata.authorization.PoliciesConfig.*; @@ -8,7 +8,6 @@ import java.util.Set; import lombok.EqualsAndHashCode; import org.apache.iceberg.exceptions.BadRequestException; -import org.springframework.stereotype.Component; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; import software.amazon.awssdk.policybuilder.iam.IamConditionOperator; @@ -20,34 +19,37 @@ import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; -@Component -class S3CredentialProvider extends CredentialProvider { - private static final int CREDS_DURATION_SECS = 15 * 60; +public class S3CredentialProvider implements CredentialProvider { + private static final int DEFAULT_CREDS_DURATION_SECS = 60 * 60; - private static final Region REGION = Region.US_EAST_1; - - protected Map loadItem( + public Map getCredentials( CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { - StsClient stsClient = stsClient(storageProviderCredentials); - String sessionPolicy = policyString(key); - AssumeRoleResponse response = - stsClient.assumeRole( - AssumeRoleRequest.builder() - .roleArn(storageProviderCredentials.role) - .roleSessionName("IcebreakerSession") // TODO: name suggests this should per - .durationSeconds(CREDS_DURATION_SECS) - .policy(sessionPolicy) - .build()); - - return Map.of( - "client.region", - REGION.id(), - "s3.access-key-id", - response.credentials().accessKeyId(), - "s3.secret-access-key", - response.credentials().secretAccessKey(), - "s3.session-token", - response.credentials().sessionToken()); + + int expiration = + storageProviderCredentials.tempCredentialExpirationSeconds == null + ? DEFAULT_CREDS_DURATION_SECS + : storageProviderCredentials.tempCredentialExpirationSeconds; + try (StsClient stsClient = stsClient(storageProviderCredentials)) { + String sessionPolicy = policyString(key); + AssumeRoleResponse response = + stsClient.assumeRole( + AssumeRoleRequest.builder() + .roleArn(storageProviderCredentials.role) + .roleSessionName("DataHubIcebergSession") + .durationSeconds(expiration) + .policy(sessionPolicy) + .build()); + + return Map.of( + "client.region", + storageProviderCredentials.region, + "s3.access-key-id", + response.credentials().accessKeyId(), + "s3.secret-access-key", + response.credentials().secretAccessKey(), + "s3.session-token", + response.credentials().sessionToken()); + } } private StsClient stsClient(StorageProviderCredentials storageProviderCredentials) { diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdvice.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdvice.java new file mode 100644 index 00000000000000..d34bf45566e690 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdvice.java @@ -0,0 +1,74 @@ +package io.datahubproject.iceberg.catalog.rest.common; + +import io.datahubproject.iceberg.catalog.rest.open.PublicIcebergApiController; +import io.datahubproject.iceberg.catalog.rest.secure.AbstractIcebergController; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.exceptions.*; +import org.apache.iceberg.rest.responses.ErrorResponse; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.ControllerAdvice; +import org.springframework.web.bind.annotation.ExceptionHandler; +import org.springframework.web.servlet.mvc.method.annotation.ResponseEntityExceptionHandler; + +@ControllerAdvice( + basePackageClasses = {AbstractIcebergController.class, PublicIcebergApiController.class}) +@Slf4j +public class IcebergExceptionHandlerAdvice extends ResponseEntityExceptionHandler { + + @ExceptionHandler(AlreadyExistsException.class) + public ResponseEntity handle(AlreadyExistsException e) { + return err(e, HttpStatus.CONFLICT); + } + + @ExceptionHandler(NoSuchNamespaceException.class) + public ResponseEntity handle(NoSuchNamespaceException e) { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(NamespaceNotEmptyException.class) + public ResponseEntity handle(NamespaceNotEmptyException e) { + return err(e, HttpStatus.BAD_REQUEST); + } + + @ExceptionHandler(NoSuchTableException.class) + public ResponseEntity handle(NoSuchTableException e) { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(NoSuchViewException.class) + public ResponseEntity handle(NoSuchViewException e) { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(NotFoundException.class) + public ResponseEntity handle(NotFoundException e) { + return err(e, HttpStatus.NOT_FOUND); + } + + @ExceptionHandler(ForbiddenException.class) + public ResponseEntity handle(ForbiddenException e) { + return err(e, HttpStatus.FORBIDDEN); + } + + @ExceptionHandler(BadRequestException.class) + public ResponseEntity handle(BadRequestException e) { + return err(e, HttpStatus.BAD_REQUEST); + } + + @ExceptionHandler(Exception.class) + public ResponseEntity handle(Exception e) throws Exception { + log.error("Server exception", e); + throw e; + } + + private ResponseEntity err(Exception e, HttpStatus errCode) { + ErrorResponse err = + ErrorResponse.builder() + .responseCode(errCode.value()) + .withMessage(e.getMessage()) + .withType(e.getClass().getSimpleName()) + .build(); + return new ResponseEntity<>(err, errCode); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java similarity index 66% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java index a839eafe1941f2..954cc512da8cca 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergSpringWebConfig.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java @@ -1,11 +1,15 @@ -package com.datahub.iceberg.catalog.rest; +package io.datahubproject.iceberg.catalog.rest.common; import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.PropertyAccessor; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategies; +import io.datahubproject.iceberg.catalog.credentials.CachingCredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.S3CredentialProvider; import java.util.List; import org.apache.iceberg.rest.RESTSerializers; +import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.http.converter.*; import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; @@ -25,4 +29,14 @@ public void extendMessageConverters(List> converters) { } } } + + @Bean + public CredentialProvider credentialProvider() { + return new S3CredentialProvider(); + } + + @Bean + public CredentialProvider cachingCredentialProvider(CredentialProvider credentialProvider) { + return new CachingCredentialProvider(credentialProvider); + } } diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiController.java new file mode 100644 index 00000000000000..fd72abeb777da6 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiController.java @@ -0,0 +1,107 @@ +package io.datahubproject.iceberg.catalog.rest.open; + +import static com.linkedin.metadata.Constants.GLOBAL_TAGS_ASPECT_NAME; +import static io.datahubproject.iceberg.catalog.Utils.*; + +import com.google.common.base.Strings; +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.TagUrn; +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.rest.secure.AbstractIcebergController; +import java.net.URISyntaxException; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +@RequestMapping("/public-iceberg") +public class PublicIcebergApiController extends AbstractIcebergController { + + @Value("${icebergCatalog.enablePublicRead}") + private boolean isPublicReadEnabled; + + @Value("${icebergCatalog.publiclyReadableTag}") + private String publiclyReadableTag; + + private static final String ACCESS_TYPE_KEY = "access-type"; + private static final String ACCESS_TYPE_PUBLIC_READ = "PUBLIC_READ"; + + @GetMapping(value = "/v1/config", produces = MediaType.APPLICATION_JSON_VALUE) + public ConfigResponse getConfig( + @RequestParam(value = "warehouse", required = true) String warehouse) { + log.info("GET CONFIG for warehouse {}", warehouse); + + checkPublicEnabled(); + + // check that warehouse exists + warehouse(warehouse, systemOperationContext); + ConfigResponse response = ConfigResponse.builder().withOverride("prefix", warehouse).build(); + log.info("GET CONFIG response: {}", response); + return response; + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", + produces = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse loadTable( + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestHeader(value = "X-Iceberg-Access-Delegation", required = false) + String xIcebergAccessDelegation, + @RequestParam(value = "snapshots", required = false) String snapshots) { + log.info("GET TABLE REQUEST {}.{}.{}", platformInstance, namespace, table); + + checkPublicEnabled(); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, systemOperationContext); + Optional datasetUrn = warehouse.getDatasetUrn(tableIdFromString(namespace, table)); + if (datasetUrn.isPresent()) { + GlobalTags tags = + (GlobalTags) + entityService.getLatestAspect( + systemOperationContext, datasetUrn.get(), GLOBAL_TAGS_ASPECT_NAME); + if (tags != null && tags.hasTags()) { + for (TagAssociation tag : tags.getTags()) { + if (publicTag().equals(tag.getTag())) { + LoadTableResponse getTableResponse = + catalogOperation( + platformInstance, + catalog -> + CatalogHandlers.loadTable(catalog, tableIdFromString(namespace, table))); + + log.info("GET TABLE RESPONSE {}", getTableResponse); + return getTableResponse; + } + } + } + } + + throw new NoSuchTableException( + "No such table %s", fullTableName(platformInstance, tableIdFromString(namespace, table))); + } + + void checkPublicEnabled() { + if (!isPublicReadEnabled || Strings.isNullOrEmpty(publiclyReadableTag)) { + throw new NotFoundException("No endpoint GET /v1/config"); + } + } + + TagUrn publicTag() { + try { + return TagUrn.createFromString("urn:li:tag:" + publiclyReadableTag); + } catch (URISyntaxException e) { + throw new RuntimeException("Invalid public tag " + publiclyReadableTag, e); + } + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractIcebergController.java similarity index 64% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractIcebergController.java index 72b64e9d30ad4b..00f999be5f4d19 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/AbstractIcebergController.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractIcebergController.java @@ -1,27 +1,30 @@ -package com.datahub.iceberg.catalog.rest; +package io.datahubproject.iceberg.catalog.rest.secure; -import static com.datahub.iceberg.catalog.Utils.*; import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME; +import static io.datahubproject.iceberg.catalog.Utils.*; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.AuthUtil; import com.datahub.authorization.EntitySpec; -import com.datahub.iceberg.catalog.CredentialProvider; -import com.datahub.iceberg.catalog.DataHubRestCatalog; -import com.datahub.iceberg.catalog.DataOperation; import com.datahub.plugins.auth.authorization.Authorizer; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.EntitySearchService; +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.DataHubRestCatalog; +import io.datahubproject.iceberg.catalog.DataOperation; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.context.RequestContext; +import io.datahubproject.metadata.services.SecretService; import jakarta.servlet.http.HttpServletRequest; import java.io.IOException; -import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.function.Function; import javax.inject.Inject; import javax.inject.Named; @@ -34,8 +37,13 @@ @Slf4j public class AbstractIcebergController { - @Autowired private EntityService entityService; - @Autowired protected CredentialProvider credentialProvider; + @Autowired protected EntityService entityService; + @Autowired private EntitySearchService searchService; + @Autowired private SecretService secretService; + + @Inject + @Named("cachingCredentialProvider") + private CredentialProvider cachingCredentialProvider; @Inject @Named("authorizerChain") @@ -43,22 +51,33 @@ public class AbstractIcebergController { @Inject @Named("systemOperationContext") - private OperationContext systemOperationContext; + protected OperationContext systemOperationContext; protected PoliciesConfig.Privilege authorize( OperationContext operationContext, - String platformInstance, + DataHubIcebergWarehouse warehouse, TableIdentifier tableIdentifier, DataOperation operation, boolean returnHighestPrivilege) { - DatasetUrn urn = datasetUrn(platformInstance, tableIdentifier); - EntitySpec entitySpec = new EntitySpec(DATASET_ENTITY_NAME, urn.toString()); - return authorize( - operationContext, - entitySpec, - platformInstanceEntitySpec(platformInstance), - operation, - returnHighestPrivilege); + Optional urn = warehouse.getDatasetUrn(tableIdentifier); + if (urn.isEmpty()) { + throw noSuchEntityException(warehouse.getPlatformInstance(), tableIdentifier); + } + + EntitySpec entitySpec = new EntitySpec(DATASET_ENTITY_NAME, urn.get().toString()); + try { + return authorize( + operationContext, + entitySpec, + platformInstanceEntitySpec(warehouse.getPlatformInstance()), + operation, + returnHighestPrivilege); + } catch (ForbiddenException e) { + // specify table id in error message instead of dataset-urn + throw new ForbiddenException( + "Data operation %s not authorized on %s", + operation, fullTableName(warehouse.getPlatformInstance(), tableIdentifier)); + } } protected PoliciesConfig.Privilege authorize( @@ -114,27 +133,20 @@ protected static class CatalogOperationResult { } protected R catalogOperation( - String platformInstance, - HttpServletRequest request, - Function authorizer, - Function function, - Function, R> includeCreds) { - OperationContext operationContext = opContext(request); - PoliciesConfig.Privilege privilege = authorizer.apply(operationContext); - + String platformInstance, Function function) { DataHubIcebergWarehouse warehouse = - DataHubIcebergWarehouse.of(platformInstance, entityService, operationContext); - DataHubRestCatalog catalog = catalog(operationContext, warehouse, platformInstance); + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, systemOperationContext); + return catalogOperation(warehouse, systemOperationContext, function); + } + + protected R catalogOperation( + DataHubIcebergWarehouse warehouse, + OperationContext operationContext, + Function function) { + DataHubRestCatalog catalog = catalog(operationContext, warehouse); try { - R response = function.apply(catalog); - if (includeCreds == null) { - return response; - } else { - CatalogOperationResult operationResult = - new CatalogOperationResult<>( - response, privilege, warehouse.getStorageProviderCredentials()); - return includeCreds.apply(operationResult); - } + return function.apply(catalog); } finally { try { catalog.close(); @@ -156,12 +168,21 @@ protected OperationContext opContext(HttpServletRequest request) { } protected DataHubRestCatalog catalog( - OperationContext operationContext, - DataHubIcebergWarehouse warehouse, - String platformInstance) { + OperationContext operationContext, DataHubIcebergWarehouse warehouse) { DataHubRestCatalog catalog = - new DataHubRestCatalog(entityService, operationContext, warehouse, credentialProvider); - catalog.initialize(platformInstance, Collections.emptyMap()); + new DataHubRestCatalog( + entityService, searchService, operationContext, warehouse, cachingCredentialProvider); return catalog; } + + protected DataHubIcebergWarehouse warehouse( + String platformInstance, OperationContext operationContext) { + return DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + } + + protected RuntimeException noSuchEntityException( + String platformInstance, TableIdentifier tableIdentifier) { + throw new UnsupportedOperationException(); + } } diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergApiController.java similarity index 90% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergApiController.java index 9dd201689185b1..ab83e3da8c2be3 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergApiController.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergApiController.java @@ -1,4 +1,4 @@ -package com.datahub.iceberg.catalog.rest; +package io.datahubproject.iceberg.catalog.rest.secure; import jakarta.servlet.http.HttpServletRequest; import lombok.extern.slf4j.Slf4j; @@ -8,6 +8,7 @@ @Slf4j @RestController +@RequestMapping("/iceberg") public class IcebergApiController extends AbstractIcebergController { @PostMapping( diff --git a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiController.java similarity index 74% rename from metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java rename to metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiController.java index 7b96ef1e83dcb3..5cb489dd4b5478 100644 --- a/metadata-service/iceberg-catalog/src/main/java/com/datahub/iceberg/catalog/rest/IcebergConfigApiController.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiController.java @@ -1,4 +1,4 @@ -package com.datahub.iceberg.catalog.rest; +package io.datahubproject.iceberg.catalog.rest.secure; import jakarta.servlet.http.HttpServletRequest; import lombok.extern.slf4j.Slf4j; @@ -8,12 +8,16 @@ @Slf4j @RestController +@RequestMapping("/iceberg") public class IcebergConfigApiController extends AbstractIcebergController { @GetMapping(value = "/v1/config", produces = MediaType.APPLICATION_JSON_VALUE) public ConfigResponse getConfig( HttpServletRequest request, - @RequestParam(value = "warehouse", required = false) String warehouse) { + @RequestParam(value = "warehouse", required = true) String warehouse) { log.info("GET CONFIG for warehouse {}", warehouse); + + // check that warehouse exists + warehouse(warehouse, opContext(request)); ConfigResponse response = ConfigResponse.builder().withOverride("prefix", warehouse).build(); log.info("GET CONFIG response: {}", response); return response; diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiController.java new file mode 100644 index 00000000000000..b947c348d9d49d --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiController.java @@ -0,0 +1,171 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static io.datahubproject.iceberg.catalog.Utils.*; + +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.DataOperation; +import io.datahubproject.metadata.context.OperationContext; +import jakarta.servlet.http.HttpServletRequest; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.requests.CreateNamespaceRequest; +import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; +import org.apache.iceberg.rest.responses.CreateNamespaceResponse; +import org.apache.iceberg.rest.responses.GetNamespaceResponse; +import org.apache.iceberg.rest.responses.ListNamespacesResponse; +import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +@RequestMapping("/iceberg") +public class IcebergNamespaceApiController extends AbstractIcebergController { + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}", + produces = MediaType.APPLICATION_JSON_VALUE) + public GetNamespaceResponse getNamespace( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace) { + log.info("GET NAMESPACE REQUEST {}.{}", platformInstance, namespace); + + OperationContext operationContext = opContext(request); + // not authorizing get/use namespace operation currently + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + GetNamespaceResponse getNamespaceResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> CatalogHandlers.loadNamespace(catalog, namespaceFromString(namespace))); + + log.info("GET NAMESPACE RESPONSE {}", getNamespaceResponse); + return getNamespaceResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces", + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + public CreateNamespaceResponse createNamespace( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @RequestBody @Nonnull CreateNamespaceRequest createNamespaceRequest) { + log.info( + "CREATE NAMESPACE REQUEST in platformInstance {}, body {}", + platformInstance, + createNamespaceRequest); + + OperationContext operationContext = opContext(request); + + authorize(operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + CreateNamespaceResponse createNamespaceResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + CatalogHandlers.createNamespace(catalog, createNamespaceRequest); + return CreateNamespaceResponse.builder() + .withNamespace(createNamespaceRequest.namespace()) + .build(); + }); + + log.info("CREATE NAMESPACE RESPONSE {}", createNamespaceResponse); + return createNamespaceResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}", + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + public UpdateNamespacePropertiesResponse updateNamespace( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody @Nonnull UpdateNamespacePropertiesRequest updateNamespacePropertiesRequest) { + log.info( + "UPDATE NAMESPACE REQUEST {}.{}, body {}", + platformInstance, + namespace, + updateNamespacePropertiesRequest); + + OperationContext operationContext = opContext(request); + + authorize(operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + UpdateNamespacePropertiesResponse updateNamespaceResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> + catalog.updateNamespaceProperties( + namespaceFromString(namespace), updateNamespacePropertiesRequest)); + + log.info("UPDATE NAMESPACE RESPONSE {}", updateNamespaceResponse); + return updateNamespaceResponse; + } + + @DeleteMapping( + value = "/v1/{prefix}/namespaces/{namespace}", + consumes = MediaType.APPLICATION_JSON_VALUE, + produces = MediaType.APPLICATION_JSON_VALUE) + public void dropNamespace( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace) { + log.info("DROP NAMESPACE REQUEST {}.{}", platformInstance, namespace); + + OperationContext operationContext = opContext(request); + + authorize(operationContext, platformInstance, DataOperation.MANAGE_NAMESPACES, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + catalogOperation( + warehouse, + operationContext, + catalog -> { + CatalogHandlers.dropNamespace(catalog, namespaceFromString(namespace)); + return null; + }); + + log.info("DROPPED NAMESPACE {}", namespace); + } + + @GetMapping(value = "/v1/{prefix}/namespaces", produces = MediaType.APPLICATION_JSON_VALUE) + public ListNamespacesResponse listNamespaces( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @RequestParam(value = "parent", required = false) String parent, + @RequestParam(value = "pageToken", required = false) String pageToken, + @RequestParam(value = "pageSize", required = false) Integer pageSize) { + log.info("LIST NAMESPACES REQUEST for {}.{}", platformInstance, parent); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.LIST, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + ListNamespacesResponse listNamespacesResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + Namespace ns; + if (StringUtils.isEmpty(parent)) { + ns = Namespace.empty(); + } else { + ns = namespaceFromString(parent); + // ensure namespace exists + catalog.loadNamespaceMetadata(ns); + } + return CatalogHandlers.listNamespaces(catalog, ns); + }); + log.info("LIST NAMESPACES RESPONSE {}", listNamespacesResponse); + return listNamespacesResponse; + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergTableApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergTableApiController.java new file mode 100644 index 00000000000000..023e4e38a2ad5e --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergTableApiController.java @@ -0,0 +1,323 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static io.datahubproject.iceberg.catalog.Utils.*; + +import com.linkedin.metadata.authorization.PoliciesConfig; +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.DataOperation; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import jakarta.servlet.http.HttpServletRequest; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.requests.CreateTableRequest; +import org.apache.iceberg.rest.requests.RegisterTableRequest; +import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +@RequestMapping("/iceberg") +public class IcebergTableApiController extends AbstractIcebergController { + + @Autowired private CredentialProvider credentialProvider; + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse createTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody CreateTableRequest createTableRequest, + @RequestHeader(value = "X-Iceberg-Access-Delegation") String xIcebergAccessDelegation) { + log.info( + "CREATE TABLE REQUEST in {}.{}, body {}", platformInstance, namespace, createTableRequest); + + OperationContext operationContext = opContext(request); + PoliciesConfig.Privilege privilege = + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + LoadTableResponse createTableResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + if (createTableRequest.stageCreate()) { + return CatalogHandlers.stageTableCreate(catalog, ns, createTableRequest); + } else { + return CatalogHandlers.createTable(catalog, ns, createTableRequest); + } + }); + log.info("CREATE TABLE RESPONSE, excluding creds, {}", createTableResponse); + return includeCreds( + platformInstance, + xIcebergAccessDelegation, + createTableResponse, + PoliciesConfig.DATA_READ_WRITE_PRIVILEGE, + warehouse.getStorageProviderCredentials()); + } + + private LoadTableResponse includeCreds( + String platformInstance, + String xIcebergAccessDelegation, + LoadTableResponse loadTableResponse, + PoliciesConfig.Privilege privilege, + CredentialProvider.StorageProviderCredentials storageProviderCredentials) { + if ("vended-credentials".equals(xIcebergAccessDelegation)) { + CredentialProvider.CredentialsCacheKey cacheKey = + new CredentialProvider.CredentialsCacheKey( + platformInstance, privilege, locations(loadTableResponse.tableMetadata())); + Map creds = + credentialProvider.getCredentials(cacheKey, storageProviderCredentials); + /* log.info( + "STS creds {} for primary table location {}", + creds, + loadTableResponse.tableMetadata().location()); */ + + return LoadTableResponse.builder() + .withTableMetadata(loadTableResponse.tableMetadata()) + .addAllConfig(creds) + .build(); + } else { + return loadTableResponse; + } + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", + produces = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse loadTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestHeader(value = "X-Iceberg-Access-Delegation", required = false) + String xIcebergAccessDelegation, + @RequestParam(value = "snapshots", required = false) String snapshots) { + log.info( + "GET TABLE REQUEST {}.{}.{}, access-delegation {}", + platformInstance, + namespace, + table, + xIcebergAccessDelegation); + + OperationContext operationContext = opContext(request); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + PoliciesConfig.Privilege privilege = + authorize( + operationContext, + warehouse, + tableIdFromString(namespace, table), + DataOperation.READ_ONLY, + true); + + LoadTableResponse getTableResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> CatalogHandlers.loadTable(catalog, tableIdFromString(namespace, table))); + log.info("GET TABLE RESPONSE, excluding creds, {}", getTableResponse); + + if (privilege == PoliciesConfig.DATA_MANAGE_TABLES_PRIVILEGE) { + privilege = PoliciesConfig.DATA_READ_WRITE_PRIVILEGE; + } else if (privilege == PoliciesConfig.DATA_MANAGE_VIEWS_PRIVILEGE) { + privilege = PoliciesConfig.DATA_READ_ONLY_PRIVILEGE; + } + return includeCreds( + platformInstance, + xIcebergAccessDelegation, + getTableResponse, + privilege, + warehouse.getStorageProviderCredentials()); + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse updateTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestBody UpdateTableRequest updateTableRequest) { + + log.info( + "UPDATE TABLE REQUEST {}.{}.{}, body {} ", + platformInstance, + namespace, + table, + updateTableRequest); + + OperationContext operationContext = opContext(request); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + authorize( + operationContext, + warehouse, + tableIdFromString(namespace, table), + DataOperation.READ_WRITE, + false); + LoadTableResponse updateTableResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> + CatalogHandlers.updateTable( + catalog, tableIdFromString(namespace, table), updateTableRequest)); + + // not refreshing credentials here. + log.info("UPDATE TABLE RESPONSE {}", updateTableResponse); + + return updateTableResponse; + } + + @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/tables/{table}") + public void dropTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("table") String table, + @RequestParam(value = "purgeRequested", defaultValue = "false") Boolean purgeRequested) { + + log.info( + "DROP TABLE REQUEST {}.{}.{}, purge = {}", + platformInstance, + namespace, + table, + purgeRequested); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + catalogOperation( + warehouse, + operationContext, + catalog -> { + TableIdentifier tableIdentifier = tableIdFromString(namespace, table); + if (purgeRequested) { + CatalogHandlers.purgeTable(catalog, tableIdentifier); + log.info("PURGED TABLE {}", tableIdentifier); + } else { + CatalogHandlers.dropTable(catalog, tableIdentifier); + log.info("DROPPED TABLE {}", tableIdentifier); + } + return null; + }); + } + + @PostMapping( + value = "/v1/{prefix}/tables/rename", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public void renameTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @RequestBody RenameTableRequest renameTableRequest) { + log.info( + "RENAME TABLE REQUEST in platformInstance {}, body {}", + platformInstance, + renameTableRequest); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + catalogOperation( + warehouse, + operationContext, + catalog -> { + CatalogHandlers.renameTable(catalog, renameTableRequest); + return null; + }); + + log.info( + "RENAMED TABLE {} to {} ", renameTableRequest.source(), renameTableRequest.destination()); + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/register", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadTableResponse registerTable( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody RegisterTableRequest registerTableRequest) { + log.info( + "REGISTER TABLE REQUEST {}.{}, body {}", platformInstance, namespace, registerTableRequest); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_TABLES, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + LoadTableResponse registerTableResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.registerTable(catalog, ns, registerTableRequest); + }); + + log.info("REGISTER TABLE RESPONSE {}", registerTableResponse); + return registerTableResponse; + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/tables", + produces = MediaType.APPLICATION_JSON_VALUE) + public ListTablesResponse listTables( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestParam(value = "pageToken", required = false) String pageToken, + @RequestParam(value = "pageSize", required = false) Integer pageSize) { + log.info("LIST TABLES REQUEST for {}.{}", platformInstance, namespace); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.LIST, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + ListTablesResponse listTablesResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.listTables(catalog, ns); + }); + log.info("LIST TABLES RESPONSE {}", listTablesResponse); + return listTablesResponse; + } + + @Override + protected NoSuchTableException noSuchEntityException( + String platformInstance, TableIdentifier tableIdentifier) { + return new NoSuchTableException( + "No such table %s", fullTableName(platformInstance, tableIdentifier)); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergViewApiController.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergViewApiController.java new file mode 100644 index 00000000000000..6a40a8012bb003 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergViewApiController.java @@ -0,0 +1,213 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static io.datahubproject.iceberg.catalog.Utils.*; + +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.DataOperation; +import io.datahubproject.metadata.context.OperationContext; +import jakarta.servlet.http.HttpServletRequest; +import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchViewException; +import org.apache.iceberg.rest.CatalogHandlers; +import org.apache.iceberg.rest.requests.CreateViewRequest; +import org.apache.iceberg.rest.requests.RenameTableRequest; +import org.apache.iceberg.rest.requests.UpdateTableRequest; +import org.apache.iceberg.rest.responses.ListTablesResponse; +import org.apache.iceberg.rest.responses.LoadViewResponse; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; + +@Slf4j +@RestController +@RequestMapping("/iceberg") +public class IcebergViewApiController extends AbstractIcebergController { + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse createView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestBody CreateViewRequest createViewRequest) { + log.info( + "CREATE VIEW REQUEST in {}.{}, body {}", platformInstance, namespace, createViewRequest); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + LoadViewResponse createViewResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.createView(catalog, ns, createViewRequest); + }); + + log.info("CREATE VIEW RESPONSE {}", createViewResponse); + return createViewResponse; + } + + @PostMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse updateView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view, + @RequestBody UpdateTableRequest updateViewRequest) { + log.info( + "UPDATE VIEW REQUEST {}.{}.{}, body {} ", + platformInstance, + namespace, + view, + updateViewRequest); + + OperationContext operationContext = opContext(request); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + authorize( + operationContext, + warehouse, + tableIdFromString(namespace, view), + DataOperation.MANAGE_VIEWS, + false); + + LoadViewResponse updateViewResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> + CatalogHandlers.updateView( + catalog, tableIdFromString(namespace, view), updateViewRequest)); + + log.info("UPDATE VIEW RESPONSE {}", updateViewResponse); + return updateViewResponse; + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views/{view}", + produces = MediaType.APPLICATION_JSON_VALUE) + public LoadViewResponse loadView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view) { + log.info("GET VIEW REQUEST {}.{}.{}", platformInstance, namespace, view); + + OperationContext operationContext = opContext(request); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + authorize( + operationContext, + warehouse, + tableIdFromString(namespace, view), + DataOperation.READ_ONLY, + false); + + LoadViewResponse getViewResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> CatalogHandlers.loadView(catalog, tableIdFromString(namespace, view))); + log.info("LOAD VIEW RESPONSE {}", getViewResponse); + return getViewResponse; + } + + @DeleteMapping(value = "/v1/{prefix}/namespaces/{namespace}/views/{view}") + public void dropView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @PathVariable("view") String view) { + log.info("DROP VIEW REQUEST {}.{}.{}", platformInstance, namespace, view); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + catalogOperation( + warehouse, + operationContext, + catalog -> { + CatalogHandlers.dropView(catalog, tableIdFromString(namespace, view)); + return null; + }); + log.info("DROPPED VIEW {}", tableIdFromString(namespace, view)); + } + + @PostMapping( + value = "/v1/{prefix}/views/rename", + produces = MediaType.APPLICATION_JSON_VALUE, + consumes = MediaType.APPLICATION_JSON_VALUE) + public void renameView( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @RequestBody RenameTableRequest renameTableRequest) { + log.info( + "RENAME VIEW REQUEST in platformInstance {}, body {}", + platformInstance, + renameTableRequest); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.MANAGE_VIEWS, false); + + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + catalogOperation( + warehouse, + operationContext, + catalog -> { + CatalogHandlers.renameView(catalog, renameTableRequest); + return null; + }); + + log.info( + "RENAMED VIEW {} to {} ", renameTableRequest.source(), renameTableRequest.destination()); + } + + @GetMapping( + value = "/v1/{prefix}/namespaces/{namespace}/views", + produces = MediaType.APPLICATION_JSON_VALUE) + public ListTablesResponse listViews( + HttpServletRequest request, + @PathVariable("prefix") String platformInstance, + @PathVariable("namespace") String namespace, + @RequestParam(value = "pageToken", required = false) String pageToken, + @RequestParam(value = "pageSize", required = false) Integer pageSize) { + log.info("LIST VIEWS REQUEST for {}.{}", platformInstance, namespace); + + OperationContext operationContext = opContext(request); + authorize(operationContext, platformInstance, DataOperation.LIST, false); + DataHubIcebergWarehouse warehouse = warehouse(platformInstance, operationContext); + + ListTablesResponse listTablesResponse = + catalogOperation( + warehouse, + operationContext, + catalog -> { + // ensure namespace exists + Namespace ns = namespaceFromString(namespace); + catalog.loadNamespaceMetadata(ns); + return CatalogHandlers.listViews(catalog, ns); + }); + log.info("LIST VIEWS RESPONSE {}", listTablesResponse); + return listTablesResponse; + } + + @Override + protected NoSuchViewException noSuchEntityException( + String platformInstance, TableIdentifier tableIdentifier) { + return new NoSuchViewException( + "No such view %s", fullTableName(platformInstance, tableIdentifier)); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/CachingCredentialProviderTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/CachingCredentialProviderTest.java new file mode 100644 index 00000000000000..e8d2c10e1d9cff --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/CachingCredentialProviderTest.java @@ -0,0 +1,107 @@ +package io.datahubproject.iceberg.catalog; + +import static org.testng.Assert.*; + +import com.linkedin.metadata.authorization.PoliciesConfig; +import io.datahubproject.iceberg.catalog.credentials.CachingCredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.S3CredentialProvider; +import java.util.Map; +import java.util.Set; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; + +public class CachingCredentialProviderTest { + + private CredentialProvider.StorageProviderCredentials storageProviderCreds; + private CredentialProvider.CredentialsCacheKey cacheKey; + + @Mock private StsClient stsClient; + + @Mock private StsClientBuilder stsClientBuilder; + + @BeforeMethod + public void setUp() { + + MockitoAnnotations.openMocks(this); + + storageProviderCreds = + new CredentialProvider.StorageProviderCredentials( + "testClientId", + "testClientSecret", + "arn:aws:iam::123456789012:role/test-role", + "us-east-1", + null); + + cacheKey = + new CredentialProvider.CredentialsCacheKey( + "testPlatform", + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of("s3://test-bucket/path/to/data")); + } + + @Test + public void testCredentialCaching() { + TestableS3CredentialProvider backingProvider = new TestableS3CredentialProvider(); + CachingCredentialProvider provider = new CachingCredentialProvider(backingProvider); + + Map firstResult = provider.getCredentials(cacheKey, storageProviderCreds); + int firstLoadCount = backingProvider.getLoadCount(); + + Map secondResult = provider.getCredentials(cacheKey, storageProviderCreds); + int secondLoadCount = backingProvider.getLoadCount(); + + assertEquals(firstLoadCount, 1, "First call should load credentials"); + assertEquals(secondLoadCount, 1, "Second call should use cached credentials"); + assertSame(firstResult, secondResult, "Should return same cached credential map"); + } + + @Test + public void testDifferentKeysGetDifferentCredentials() { + TestableS3CredentialProvider backingProvider = new TestableS3CredentialProvider(); + CachingCredentialProvider provider = new CachingCredentialProvider(backingProvider); + + CredentialProvider.CredentialsCacheKey secondKey = + new CredentialProvider.CredentialsCacheKey( + "differentPlatform", + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of("s3://test-bucket/path/to/data")); + + Map firstResult = provider.getCredentials(cacheKey, storageProviderCreds); + int firstLoadCount = backingProvider.getLoadCount(); + + Map secondResult = provider.getCredentials(secondKey, storageProviderCreds); + int secondLoadCount = backingProvider.getLoadCount(); + + assertEquals(firstLoadCount, 1, "First call should load credentials"); + assertEquals(secondLoadCount, 2, "Different key should trigger new credential load"); + assertNotSame(firstResult, secondResult, "Different keys should get different credential maps"); + } + + /** + * Test implementation that allows us to verify the number of credential loads without relying on + * implementation details. + */ + private static class TestableS3CredentialProvider extends S3CredentialProvider { + private int loadCount = 0; + + @Override + public Map getCredentials( + CredentialsCacheKey key, StorageProviderCredentials storageProviderCredentials) { + loadCount++; + return Map.of( + "client.region", storageProviderCredentials.region, + "s3.access-key-id", "TESTACCESSKEY" + loadCount, // Make each load unique + "s3.secret-access-key", "TESTSECRETKEY" + loadCount, + "s3.session-token", "TESTSESSIONTOKEN" + loadCount); + } + + public int getLoadCount() { + return loadCount; + } + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouseTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouseTest.java new file mode 100644 index 00000000000000..cacca17e2c479b --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubIcebergWarehouseTest.java @@ -0,0 +1,428 @@ +package io.datahubproject.iceberg.catalog; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.FabricType; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataplatforminstance.IcebergWarehouseInfo; +import com.linkedin.dataset.IcebergCatalogInfo; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.platformresource.PlatformResourceInfo; +import com.linkedin.secret.DataHubSecretValue; +import com.linkedin.util.Pair; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.services.SecretService; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.exceptions.NotFoundException; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataHubIcebergWarehouseTest { + + @Mock private EntityService entityService; + + @Mock private SecretService secretService; + + @Mock private OperationContext operationContext; + + private IcebergWarehouseInfo icebergWarehouse; + + @Mock private RecordTemplate warehouseAspect; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + } + + @Test + public void testGetStorageProviderCredentials() throws Exception { + String platformInstance = "test-platform"; + String clientId = "testClientId"; + String clientSecret = "testClientSecret"; + String role = "testRole"; + String dataRoot = "s3://data-root/test/"; + String region = "us-east-1"; + + Urn clientIdUrn = Urn.createFromString("urn:li:secret:clientId"); + Urn clientSecretUrn = Urn.createFromString("urn:li:secret:clientSecret"); + + icebergWarehouse = new IcebergWarehouseInfo(); + icebergWarehouse.setClientId(clientIdUrn); + icebergWarehouse.setClientSecret(clientSecretUrn); + icebergWarehouse.setDataRoot(dataRoot); + icebergWarehouse.setRegion(region); + icebergWarehouse.setRole(role); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(icebergWarehouse.data()); + + when(secretService.decrypt(eq(clientId))).thenReturn("decrypted-" + clientId); + when(secretService.decrypt(eq(clientSecret))).thenReturn("decrypted-" + clientSecret); + + DataHubSecretValue clientIdValue = new DataHubSecretValue(); + clientIdValue.setValue(clientId); + + DataHubSecretValue clientSecretValue = new DataHubSecretValue(); + clientSecretValue.setValue(clientSecret); + + Map> aspectsMap = new HashMap<>(); + aspectsMap.put(clientIdUrn, Arrays.asList(clientIdValue)); + aspectsMap.put(clientSecretUrn, Arrays.asList(clientSecretValue)); + + when(entityService.getLatestAspects( + eq(operationContext), + eq(Set.of(clientIdUrn, clientSecretUrn)), + eq(Set.of("dataHubSecretValue")), + eq(false))) + .thenReturn(aspectsMap); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + CredentialProvider.StorageProviderCredentials credentials = + warehouse.getStorageProviderCredentials(); + + assertNotNull(credentials); + assertEquals(credentials.clientId, "decrypted-" + clientId); + assertEquals(credentials.clientSecret, "decrypted-" + clientSecret); + assertEquals(credentials.role, role); + assertEquals(credentials.region, region); + } + + @Test( + dependsOnMethods = { + "testGetStorageProviderCredentials" + }) // Dependency for icebergWarehouse setup + public void testOf_Success() throws Exception { + String platformInstance = "test-platform"; + when(entityService.getLatestAspect( + any(OperationContext.class), + any(Urn.class), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(icebergWarehouse.data()); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + assertNotNull(warehouse); + assertEquals(warehouse.getPlatformInstance(), platformInstance); + } + + @Test( + expectedExceptions = NotFoundException.class, + expectedExceptionsMessageRegExp = "Unknown warehouse non-existent-platform") + public void testOf_WarehouseNotFound() throws Exception { + String platformInstance = "non-existent-platform"; + when(entityService.getLatestAspect( + any(OperationContext.class), + any(Urn.class), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(null); + + DataHubIcebergWarehouse.of(platformInstance, entityService, secretService, operationContext); + } + + @Test + public void testGetDataRoot() throws Exception { + String platformInstance = "test-platform"; + String dataRoot = "s3://test-bucket/"; + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()) + .thenReturn(new IcebergWarehouseInfo().setDataRoot(dataRoot).data()); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + String result = warehouse.getDataRoot(); + + assertEquals(result, dataRoot); + } + + @Test + public void testGetDatasetUrn() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier tableId = TableIdentifier.of("db", "table"); + Urn resourceUrn = + Urn.createFromString("urn:li:platformResource:iceberg.test-platform.db.table"); + DatasetUrn expectedDatasetUrn = + new DatasetUrn( + DataPlatformUrn.createFromString("urn:li:dataPlatform:iceberg"), + "uuid", + FabricType.PROD); + + PlatformResourceInfo resourceInfo = new PlatformResourceInfo(); + resourceInfo.setPrimaryKey(expectedDatasetUrn.toString()); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + when(entityService.getLatestAspect( + eq(operationContext), eq(resourceUrn), eq("platformResourceInfo"))) + .thenReturn(resourceInfo); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + Optional result = warehouse.getDatasetUrn(tableId); + + assertTrue(result.isPresent()); + assertEquals(result.get(), expectedDatasetUrn); + } + + @Test + public void testGetIcebergMetadata() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier tableId = TableIdentifier.of("db", "table"); + DatasetUrn datasetUrn = + new DatasetUrn( + DataPlatformUrn.createFromString("urn:li:dataPlatform:iceberg"), + "uuid", + FabricType.PROD); + + IcebergCatalogInfo expectedMetadata = + new IcebergCatalogInfo().setMetadataPointer("s3://bucket/path"); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + + // Mock getDatasetUrn behavior + PlatformResourceInfo resourceInfo = new PlatformResourceInfo(); + resourceInfo.setPrimaryKey(datasetUrn.toString()); + when(entityService.getLatestAspect(any(), any(), eq("platformResourceInfo"))) + .thenReturn(resourceInfo); + + when(entityService.getLatestAspect( + eq(operationContext), + eq(datasetUrn), + eq(DataHubIcebergWarehouse.DATASET_ICEBERG_METADATA_ASPECT_NAME))) + .thenReturn(expectedMetadata); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + IcebergCatalogInfo result = warehouse.getIcebergMetadata(tableId); + + assertNotNull(result); + assertEquals(result.getMetadataPointer(), expectedMetadata.getMetadataPointer()); + } + + @Test + public void testGetIcebergMetadataEnveloped() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier tableId = TableIdentifier.of("db", "table"); + DatasetUrn datasetUrn = + new DatasetUrn( + DataPlatformUrn.createFromString("urn:li:dataPlatform:iceberg"), + "uuid", + FabricType.PROD); + + EnvelopedAspect expectedEnvelopedAspect = mock(EnvelopedAspect.class); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + + // Mock getDatasetUrn behavior + PlatformResourceInfo resourceInfo = new PlatformResourceInfo(); + resourceInfo.setPrimaryKey(datasetUrn.toString()); + when(entityService.getLatestAspect(any(), any(), eq("platformResourceInfo"))) + .thenReturn(resourceInfo); + + when(entityService.getLatestEnvelopedAspect( + eq(operationContext), + eq("dataset"), + eq(datasetUrn), + eq(DataHubIcebergWarehouse.DATASET_ICEBERG_METADATA_ASPECT_NAME))) + .thenReturn(expectedEnvelopedAspect); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + Pair result = warehouse.getIcebergMetadataEnveloped(tableId); + + assertNotNull(result); + assertEquals(result.getFirst(), expectedEnvelopedAspect); + assertEquals(result.getSecond(), datasetUrn); + } + + @Test + public void testDeleteDataset() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier tableId = TableIdentifier.of("db", "table"); + Urn resourceUrn = + Urn.createFromString("urn:li:platformResource:iceberg.test-platform.db.table"); + DatasetUrn datasetUrn = + new DatasetUrn( + DataPlatformUrn.createFromString("urn:li:dataPlatform:iceberg"), + "uuid", + FabricType.PROD); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + when(entityService.exists(eq(operationContext), eq(resourceUrn))).thenReturn(true); + + // Mock getDatasetUrn behavior + PlatformResourceInfo resourceInfo = new PlatformResourceInfo(); + resourceInfo.setPrimaryKey(datasetUrn.toString()); + when(entityService.getLatestAspect(any(), any(), eq("platformResourceInfo"))) + .thenReturn(resourceInfo); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + boolean result = warehouse.deleteDataset(tableId); + + assertTrue(result); + verify(entityService).deleteUrn(eq(operationContext), eq(resourceUrn)); + verify(entityService).deleteUrn(eq(operationContext), eq(datasetUrn)); + } + + @Test + public void testCreateDataset() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier tableId = TableIdentifier.of("db", "table"); + AuditStamp auditStamp = + new AuditStamp() + .setTime(System.currentTimeMillis()) + .setActor(Urn.createFromString("urn:li:corpuser:testUser")); + + when(entityService.getLatestAspect(any(), any(), eq("icebergWarehouseInfo"))) + .thenReturn(warehouseAspect); + IcebergWarehouseInfo warehouse = new IcebergWarehouseInfo().setEnv(FabricType.PROD); + when(warehouseAspect.data()).thenReturn(warehouse.data()); + + DataHubIcebergWarehouse icebergWarehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + DatasetUrn result = icebergWarehouse.createDataset(tableId, false, auditStamp); + + assertNotNull(result); + assertEquals(result.getPlatformEntity(), Urn.createFromString("urn:li:dataPlatform:iceberg")); + assertEquals(result.getOriginEntity(), FabricType.PROD); + assertTrue(result.getDatasetNameEntity().startsWith(platformInstance + ".")); + + verify(entityService) + .ingestProposal( + eq(operationContext), any(MetadataChangeProposal.class), eq(auditStamp), eq(false)); + } + + @Test + public void testRenameDataset() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier fromTableId = TableIdentifier.of("db", "oldTable"); + TableIdentifier toTableId = TableIdentifier.of("db", "newTable"); + DatasetUrn existingDatasetUrn = + new DatasetUrn( + DataPlatformUrn.createFromString("urn:li:dataPlatform:iceberg"), + "test-dataset", + FabricType.PROD); + AuditStamp auditStamp = + new AuditStamp() + .setTime(System.currentTimeMillis()) + .setActor(Urn.createFromString("urn:li:corpuser:testUser")); + + when(entityService.getLatestAspect( + any(), + any(), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + + // Mock getDatasetUrn behavior for source table + PlatformResourceInfo resourceInfo = new PlatformResourceInfo(); + resourceInfo.setPrimaryKey(existingDatasetUrn.toString()); + when(entityService.getLatestAspect(any(), any(), eq("platformResourceInfo"))) + .thenReturn(resourceInfo); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + DatasetUrn result = warehouse.renameDataset(fromTableId, toTableId, false, auditStamp); + + assertNotNull(result); + assertEquals(result, existingDatasetUrn); + + verify(entityService) + .ingestProposal( + eq(operationContext), any(MetadataChangeProposal.class), eq(auditStamp), eq(false)); + verify(entityService).deleteUrn(eq(operationContext), any(Urn.class)); + } + + @Test(expectedExceptions = NoSuchTableException.class) + public void testRenameDataset_SourceTableNotFound() throws Exception { + String platformInstance = "test-platform"; + TableIdentifier fromTableId = TableIdentifier.of("db", "oldTable"); + TableIdentifier toTableId = TableIdentifier.of("db", "newTable"); + AuditStamp auditStamp = + new AuditStamp() + .setTime(System.currentTimeMillis()) + .setActor(Urn.createFromString("urn:li:corpuser:testUser")); + + when(entityService.getLatestAspect(any(), any(), eq("icebergWarehouseInfo"))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(new IcebergWarehouseInfo().data()); + + // Mock empty response for getDatasetUrn + when(entityService.getLatestAspect(any(), any(), eq("platformResourceInfo"))).thenReturn(null); + + DataHubIcebergWarehouse warehouse = + DataHubIcebergWarehouse.of( + platformInstance, entityService, secretService, operationContext); + + warehouse.renameDataset(fromTableId, toTableId, false, auditStamp); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubRestCatalogTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubRestCatalogTest.java new file mode 100644 index 00000000000000..ae333b0ccbd65c --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/DataHubRestCatalogTest.java @@ -0,0 +1,394 @@ +package io.datahubproject.iceberg.catalog; + +import static com.linkedin.metadata.Constants.CONTAINER_PROPERTIES_ASPECT_NAME; +import static io.datahubproject.iceberg.catalog.DataHubRestCatalog.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.container.ContainerProperties; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringMap; +import com.linkedin.dataset.DatasetProperties; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.metadata.search.SearchEntity; +import com.linkedin.metadata.search.SearchEntityArray; +import com.linkedin.metadata.search.SearchResult; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import java.util.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.rest.requests.UpdateNamespacePropertiesRequest; +import org.apache.iceberg.rest.responses.UpdateNamespacePropertiesResponse; +import org.mockito.ArgumentCaptor; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataHubRestCatalogTest { + + @Mock private EntityService entityService; + + @Mock private EntitySearchService searchService; + + @Mock private OperationContext operationContext; + + @Mock private DataHubIcebergWarehouse warehouse; + + @Mock private CredentialProvider credentialProvider; + + private DataHubRestCatalog catalog; + + @BeforeMethod + public void setup() { + MockitoAnnotations.openMocks(this); + when(warehouse.getPlatformInstance()).thenReturn("test-platform"); + String warehouseRoot = "s3://data/warehouse/"; + when(warehouse.getDataRoot()).thenReturn(warehouseRoot); + catalog = + new DataHubRestCatalog( + entityService, searchService, operationContext, warehouse, credentialProvider); + } + + @Test + public void testCreateNamespace_SingleLevel() throws Exception { + Namespace namespace = Namespace.of("db1"); + Map properties = Map.of(); + + catalog.createNamespace(namespace, properties); + + ArgumentCaptor mcpCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + verify(entityService, times(3)) + .ingestProposal( + eq(operationContext), mcpCaptor.capture(), any(AuditStamp.class), eq(false)); + + List mcps = mcpCaptor.getAllValues(); + + MetadataChangeProposal subTypesMcp = mcps.get(0); + assertEquals(subTypesMcp.getAspectName(), "subTypes"); + + MetadataChangeProposal containerPropertiesMcp = mcps.get(1); + assertEquals(containerPropertiesMcp.getAspectName(), "containerProperties"); + } + + @Test + public void testCreateNamespace_MultiLevel() throws Exception { + Namespace namespace = Namespace.of("db1", "schema1"); + Map properties = Map.of(); + + when(entityService.exists(eq(operationContext), any(Urn.class))).thenReturn(true); + + catalog.createNamespace(namespace, properties); + + ArgumentCaptor mcpCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + verify(entityService, times(4)) + .ingestProposal( + eq(operationContext), mcpCaptor.capture(), any(AuditStamp.class), eq(false)); + + List mcps = mcpCaptor.getAllValues(); + MetadataChangeProposal containerMcp = mcps.get(0); + assertEquals(containerMcp.getAspectName(), "container"); + } + + @Test(expectedExceptions = NoSuchNamespaceException.class) + public void testCreateNamespace_MultiLevel_ParentMissing() throws Exception { + Namespace namespace = Namespace.of("db1", "schema1"); + Map properties = Map.of(); + + when(entityService.exists(eq(operationContext), any(Urn.class))).thenReturn(false); + + // Act - should throw exception + catalog.createNamespace(namespace, properties); + } + + @Test + public void testLoadNamespaceMetadata_Exists() throws Exception { + Namespace namespace = Namespace.of("db1", "schema1"); + ContainerProperties containerProperties = + new ContainerProperties().setCustomProperties(new StringMap()); + Urn urn = Urn.createFromString("urn:li:container:iceberg__test-platform.db1.schema1"); + when(entityService.getLatestAspect( + eq(operationContext), eq(urn), eq(CONTAINER_PROPERTIES_ASPECT_NAME))) + .thenReturn(containerProperties); + + Map metadata = catalog.loadNamespaceMetadata(namespace); + + assertNotNull(metadata); + assertTrue(metadata.isEmpty()); + verify(entityService) + .getLatestAspect(eq(operationContext), eq(urn), eq(CONTAINER_PROPERTIES_ASPECT_NAME)); + } + + @Test(expectedExceptions = NoSuchNamespaceException.class) + public void testLoadNamespaceMetadata_NotExists() throws Exception { + Namespace namespace = Namespace.of("db1", "schema1"); + Urn urn = Urn.createFromString("urn:li:container:iceberg__test-platform.db1.schema1"); + when(entityService.getLatestAspect( + eq(operationContext), eq(urn), eq(CONTAINER_PROPERTIES_ASPECT_NAME))) + .thenReturn(null); + + // Act & Assert - should throw exception + catalog.loadNamespaceMetadata(namespace); + } + + @Test + public void testDropTable() throws Exception { + TableIdentifier tableIdentifier = TableIdentifier.of("db1", "table1"); + when(warehouse.deleteDataset(eq(tableIdentifier))).thenReturn(true); + + boolean result = catalog.dropTable(tableIdentifier, false); + + assertTrue(result); + verify(warehouse).deleteDataset(eq(tableIdentifier)); + } + + @Test(expectedExceptions = UnsupportedOperationException.class) + public void testDropTable_WithPurgeThrows() { + TableIdentifier tableIdentifier = TableIdentifier.of("db1", "table1"); + + catalog.dropTable(tableIdentifier, true); + } + + @Test + public void testDropTable_NonExistentTable() throws Exception { + TableIdentifier tableIdentifier = TableIdentifier.of("db1", "table1"); + when(entityService.exists(eq(operationContext), (Urn) any())).thenReturn(false); + + boolean result = catalog.dropTable(tableIdentifier, false); + + // Assert + assertFalse(result); + verify(entityService, never()).deleteUrn(any(), any()); + } + + @Test + public void testDefaultWarehouseLocation() { + TableIdentifier tableIdentifier = TableIdentifier.of("db1", "table1"); + + String location = catalog.defaultWarehouseLocation(tableIdentifier); + + assertEquals(location, "s3://data/warehouse/db1/table1"); + } + + @Test + public void testDefaultWarehouseLocationWithoutTrailingSlash() { + String warehouseRoot = "s3://data/warehouse"; + when(warehouse.getDataRoot()).thenReturn(warehouseRoot); + DataHubRestCatalog testCatalog = + new DataHubRestCatalog( + entityService, searchService, operationContext, warehouse, credentialProvider); + String warehouseLocation = + testCatalog.defaultWarehouseLocation(TableIdentifier.of("db1", "table1")); + assertEquals(warehouseLocation, "s3://data/warehouse/db1/table1"); + } + + @Test + public void testListNamespaces_EmptyNamespace() throws Exception { + // Test for root level namespace listing + Namespace emptyNamespace = Namespace.empty(); + SearchResult mockResult = mock(SearchResult.class); + List entitiesList = + Arrays.asList( + createSearchEntity("urn:li:container:iceberg__ns1"), + createSearchEntity("urn:li:container:iceberg__ns2")); + SearchEntityArray entities = new SearchEntityArray(); + entities.addAll(entitiesList); + when(mockResult.getEntities()).thenReturn(entities); + when(mockResult.getNumEntities()).thenReturn(2); + when(searchService.search( + eq(operationContext), any(), eq("*"), any(), any(), eq(0), eq(PAGE_SIZE))) + .thenReturn(mockResult); + + List result = catalog.listNamespaces(emptyNamespace); + + assertEquals(result.size(), 2); + assertEquals(result.get(0), Namespace.of("ns1")); + assertEquals(result.get(1), Namespace.of("ns2")); + } + + @Test + public void testListNamespaces_NestedNamespace() throws Exception { + Namespace parentNamespace = Namespace.of("parent"); + SearchResult mockResult = mock(SearchResult.class); + List entitiesList = + Arrays.asList( + createSearchEntity("urn:li:container:iceberg__parent.ns1"), + createSearchEntity("urn:li:container:iceberg__parent.ns2")); + SearchEntityArray entities = new SearchEntityArray(); + entities.addAll(entitiesList); + when(mockResult.getEntities()).thenReturn(entities); + when(mockResult.getNumEntities()).thenReturn(2); + when(searchService.search( + eq(operationContext), any(), eq("*"), any(), any(), eq(0), eq(PAGE_SIZE))) + .thenReturn(mockResult); + + List result = catalog.listNamespaces(parentNamespace); + + assertEquals(result.size(), 2); + assertEquals(result.get(0), Namespace.of("parent", "ns1")); + assertEquals(result.get(1), Namespace.of("parent", "ns2")); + } + + @Test + public void testDropNamespace() throws Exception { + Namespace namespace = Namespace.of("db1"); + boolean result = catalog.dropNamespace(namespace); + assertFalse(result); // Current implementation always returns false + } + + @Test + public void testListTables() throws Exception { + Namespace namespace = Namespace.of("ns1"); + List entitiesList = + Arrays.asList( + createSearchEntity("urn:li:dataset:iceberg__ns1.table1"), + createSearchEntity("urn:li:dataset:iceberg__ns1.table2")); + SearchEntityArray entities = new SearchEntityArray(); + entities.addAll(entitiesList); + SearchResult searchResult = new SearchResult(); + searchResult.setEntities(entities); + searchResult.setNumEntities(2); + + // Mock aspect retrieval + Map> aspects = new HashMap<>(); + for (SearchEntity entity : entities) { + DatasetProperties props = + new DatasetProperties() + .setQualifiedName( + "warehouse.ns1.table" + + entity + .getEntity() + .toString() + .charAt(entity.getEntity().toString().length() - 1)); + aspects.put(entity.getEntity(), Arrays.asList(props)); + } + when(entityService.getLatestAspects(eq(operationContext), any(), any(), eq(false))) + .thenReturn(aspects); + when(searchService.search( + eq(operationContext), any(), any(), any(), any(), eq(0), eq(PAGE_SIZE))) + .thenReturn(searchResult); + + List result = catalog.listTables(namespace); + + assertEquals(result.size(), 2); + assertEquals(result.get(0), TableIdentifier.of("ns1", "table1")); + assertEquals(result.get(1), TableIdentifier.of("ns1", "table2")); + } + + @Test + public void testListViews() throws Exception { + Namespace namespace = Namespace.of("ns1"); + List entitiesList = + Arrays.asList( + createSearchEntity("urn:li:dataset:iceberg__ns1.view1"), + createSearchEntity("urn:li:dataset:iceberg__ns1.view2")); + SearchEntityArray entities = new SearchEntityArray(); + entities.addAll(entitiesList); + SearchResult searchResult = new SearchResult(); + searchResult.setEntities(entities); + searchResult.setNumEntities(2); + + // Mock aspect retrieval + Map> aspects = new HashMap<>(); + for (SearchEntity entity : entities) { + DatasetProperties props = + new DatasetProperties() + .setQualifiedName( + "warehouse.ns1.view" + + entity + .getEntity() + .toString() + .charAt(entity.getEntity().toString().length() - 1)); + aspects.put(entity.getEntity(), Arrays.asList(props)); + } + when(entityService.getLatestAspects(eq(operationContext), any(), any(), eq(false))) + .thenReturn(aspects); + + when(searchService.search( + eq(operationContext), any(), any(), any(), any(), eq(0), eq(PAGE_SIZE))) + .thenReturn(searchResult); + + List result = catalog.listViews(namespace); + + assertEquals(result.size(), 2); + assertEquals(result.get(0), TableIdentifier.of("ns1", "view1")); + assertEquals(result.get(1), TableIdentifier.of("ns1", "view2")); + } + + @Test + public void testDropView() throws Exception { + TableIdentifier viewIdentifier = TableIdentifier.of("ns1", "view1"); + when(warehouse.deleteDataset(eq(viewIdentifier))).thenReturn(true); + + boolean result = catalog.dropView(viewIdentifier); + + assertTrue(result); + verify(warehouse).deleteDataset(eq(viewIdentifier)); + } + + @Test + public void testUpdateNamespaceProperties() throws Exception { + Namespace namespace = Namespace.of("ns1"); + Map existingProps = new HashMap<>(); + existingProps.put("existing1", "value1"); + existingProps.put("toRemove1", "value2"); + + when(entityService.getLatestAspect( + eq(operationContext), any(), eq(CONTAINER_PROPERTIES_ASPECT_NAME))) + .thenReturn(new ContainerProperties().setCustomProperties(new StringMap(existingProps))); + + UpdateNamespacePropertiesRequest request = + UpdateNamespacePropertiesRequest.builder() + .update("new1", "newValue1") + .remove("toRemove1") + .build(); + + UpdateNamespacePropertiesResponse response = + catalog.updateNamespaceProperties(namespace, request); + + assertTrue(response.removed().contains("toRemove1")); + assertTrue(response.updated().contains("new1")); + assertTrue(response.missing().isEmpty()); + + ArgumentCaptor mcpCaptor = + ArgumentCaptor.forClass(MetadataChangeProposal.class); + verify(entityService, atLeastOnce()) + .ingestProposal( + eq(operationContext), mcpCaptor.capture(), any(AuditStamp.class), eq(false)); + + // Verify the final properties + ContainerProperties expectedProps = + new ContainerProperties() + .setName("ns1") + .setCustomProperties( + new StringMap( + Map.of( + "existing1", "value1", + "new1", "newValue1"))); + + List mcps = mcpCaptor.getAllValues(); + MetadataChangeProposal finalMcp = mcps.get(mcps.size() - 1); + assertEquals(finalMcp.getAspectName(), "containerProperties"); + // Note: You might need to add more specific verification of the serialized aspect + } + + // Helper method for creating mock search entities + private SearchEntity createSearchEntity(String urn) throws Exception { + SearchEntity entity = new SearchEntity(); + entity.setEntity(Urn.createFromString(urn)); + return entity; + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/S3CredentialProviderTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/S3CredentialProviderTest.java new file mode 100644 index 00000000000000..a4915d83d423f7 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/S3CredentialProviderTest.java @@ -0,0 +1,126 @@ +package io.datahubproject.iceberg.catalog; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.metadata.authorization.PoliciesConfig; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.iceberg.catalog.credentials.S3CredentialProvider; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.exceptions.BadRequestException; +import org.mockito.Mock; +import org.mockito.MockedStatic; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.sts.StsClient; +import software.amazon.awssdk.services.sts.StsClientBuilder; +import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; +import software.amazon.awssdk.services.sts.model.AssumeRoleResponse; +import software.amazon.awssdk.services.sts.model.Credentials; + +public class S3CredentialProviderTest { + + private CredentialProvider.StorageProviderCredentials storageProviderCreds; + private CredentialProvider.CredentialsCacheKey cacheKey; + + @Mock private StsClient stsClient; + + private S3CredentialProvider credentialProvider; + + @Mock private StsClientBuilder stsClientBuilder; + + @BeforeMethod + public void setUp() { + + MockitoAnnotations.openMocks(this); + + storageProviderCreds = + new CredentialProvider.StorageProviderCredentials( + "testClientId", + "testClientSecret", + "arn:aws:iam::123456789012:role/test-role", + "us-east-1", + null); + + cacheKey = + new CredentialProvider.CredentialsCacheKey( + "testPlatform", + PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, + Set.of("s3://test-bucket/path/to/data")); + + credentialProvider = new S3CredentialProvider(); + } + + @Test + public void testGetCredentials() { + StsClientBuilder builderMock = mock(StsClientBuilder.class); + StsClient clientMock = mock(StsClient.class); + + // Mock the builder chain + when(builderMock.region(any(Region.class))).thenReturn(builderMock); + when(builderMock.credentialsProvider(any(StaticCredentialsProvider.class))) + .thenReturn(builderMock); + when(builderMock.build()).thenReturn(clientMock); + + AssumeRoleResponse assumeRoleResponse = + AssumeRoleResponse.builder() + .credentials( + Credentials.builder() + .accessKeyId("testAccessId-temp") + .secretAccessKey("testSecretKey-temp") + .sessionToken("testSessionToken-temp") + .expiration(java.time.Instant.now().plusSeconds(900)) + .build()) + .build(); + when(clientMock.assumeRole(any(AssumeRoleRequest.class))).thenReturn(assumeRoleResponse); + + try (MockedStatic stsClientMockedStatic = mockStatic(StsClient.class)) { + // Mock the static builder() method + stsClientMockedStatic.when(StsClient::builder).thenReturn(builderMock); + + /* "client.region", + REGION.id(), + "s3.access-key-id", + response.credentials().accessKeyId(), + "s3.secret-access-key", + response.credentials().secretAccessKey(), + "s3.session-token", + response.credentials().sessionToken()); + + */ + + Map creds = credentialProvider.getCredentials(cacheKey, storageProviderCreds); + assertNotNull(creds); + assertEquals(creds.get("client.region"), Region.of("us-east-1").id()); + assertEquals(creds.get("s3.access-key-id"), "testAccessId-temp"); + assertEquals(creds.get("s3.secret-access-key"), "testSecretKey-temp"); + assertEquals(creds.get("s3.session-token"), "testSessionToken-temp"); + } + } + + @Test(expectedExceptions = IllegalStateException.class) + public void testGetWithUnsupportedPrivilege() { + S3CredentialProvider provider = new S3CredentialProvider(); + CredentialProvider.CredentialsCacheKey keyWithUnsupportedPrivilege = + new CredentialProvider.CredentialsCacheKey( + "testPlatform", + PoliciesConfig.DATA_MANAGE_TABLES_PRIVILEGE, + Set.of("s3://test-bucket/path/to/data")); + + provider.getCredentials(keyWithUnsupportedPrivilege, storageProviderCreds); + } + + @Test(expectedExceptions = BadRequestException.class) + public void testGetWithEmptyLocations() { + S3CredentialProvider provider = new S3CredentialProvider(); + CredentialProvider.CredentialsCacheKey keyWithEmptyLocations = + new CredentialProvider.CredentialsCacheKey( + "testPlatform", PoliciesConfig.DATA_READ_ONLY_PRIVILEGE, Set.of()); + + provider.getCredentials(keyWithEmptyLocations, storageProviderCreds); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/UtilsTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/UtilsTest.java new file mode 100644 index 00000000000000..f7eb654c59f6a4 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/UtilsTest.java @@ -0,0 +1,143 @@ +package io.datahubproject.iceberg.catalog; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.mxe.MetadataChangeProposal; +import java.util.Map; +import java.util.Set; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableProperties; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class UtilsTest { + + @Mock private TableMetadata mockTableMetadata; + + @BeforeMethod + public void setup() { + MockitoAnnotations.initMocks(this); + } + + @Test + public void testAuditStamp() { + AuditStamp stamp = Utils.auditStamp(); + assertNotNull(stamp); + assertEquals(stamp.getActor().toString(), Constants.SYSTEM_ACTOR); + assertTrue(stamp.getTime() > 0); + } + + @Test + public void testPlatformInstanceMcp() { + String platformInstance = "testInstance"; + String entityType = "dataset"; + Urn urn = Utils.platformUrn(); + + MetadataChangeProposal mcp = Utils.platformInstanceMcp(platformInstance, urn, entityType); + + assertNotNull(mcp); + assertEquals(mcp.getEntityUrn(), urn); + assertEquals(mcp.getEntityType(), entityType); + assertEquals(mcp.getAspectName(), Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME); + assertEquals(mcp.getChangeType(), ChangeType.UPSERT); + } + + @Test + public void testPlatformUrn() { + DataPlatformUrn urn = Utils.platformUrn(); + assertNotNull(urn); + assertEquals(urn.getPlatformNameEntity(), "iceberg"); + assertEquals(urn.getEntityType(), "dataPlatform"); + } + + @Test + public void testContainerUrn() { + String platformInstance = "testInstance"; + Namespace namespace = Namespace.of("db", "schema"); + + Urn containerUrn = Utils.containerUrn(platformInstance, namespace); + + assertNotNull(containerUrn); + assertEquals(containerUrn.toString(), "urn:li:container:iceberg__testInstance.db.schema"); + } + + @Test + public void testLocations() { + String mainLocation = "s3://bucket/main"; + String writeDataLocation = "s3://bucket/main/data"; + String writeMetadataLocation = "s3://bucket/main/metadata"; + + Map properties = mock(Map.class); + when(properties.containsKey(TableProperties.WRITE_DATA_LOCATION)).thenReturn(true); + when(properties.containsKey(TableProperties.WRITE_METADATA_LOCATION)).thenReturn(true); + when(properties.get(TableProperties.WRITE_DATA_LOCATION)).thenReturn(writeDataLocation); + when(properties.get(TableProperties.WRITE_METADATA_LOCATION)).thenReturn(writeMetadataLocation); + + when(mockTableMetadata.location()).thenReturn(mainLocation); + when(mockTableMetadata.properties()).thenReturn(properties); + + Set locations = Utils.locations(mockTableMetadata); + + assertEquals(locations.size(), 3); + assertTrue(locations.contains(mainLocation)); + assertTrue(locations.contains(writeDataLocation)); + assertTrue(locations.contains(writeMetadataLocation)); + } + + public void testEmptyLocations() { + String mainLocation = "s3://bucket/main"; + String writeDataLocation = "s3://bucket/main/data"; + String writeMetadataLocation = "s3://bucket/main/metadata"; + + Map properties = mock(Map.class); + when(properties.containsKey(TableProperties.WRITE_DATA_LOCATION)).thenReturn(false); + when(properties.containsKey(TableProperties.WRITE_METADATA_LOCATION)).thenReturn(false); + when(properties.get(TableProperties.WRITE_DATA_LOCATION)).thenReturn(writeDataLocation); + when(properties.get(TableProperties.WRITE_METADATA_LOCATION)).thenReturn(writeMetadataLocation); + + when(mockTableMetadata.location()).thenReturn(mainLocation); + when(mockTableMetadata.properties()).thenReturn(properties); + + Set locations = Utils.locations(mockTableMetadata); + + assertEquals(locations.size(), 1); + assertTrue(locations.contains(mainLocation)); + } + + @Test + public void testNamespaceFromString() { + String namespaceStr = "db\u001fschema"; // Note, separator is \u001f + Namespace namespace = Utils.namespaceFromString(namespaceStr); + + assertNotNull(namespace); + assertEquals(namespace.levels(), new String[] {"db", "schema"}); + } + + @Test + public void testTableIdFromString() { + String namespace = "db\u001fschema"; + String table = "mytable"; + + TableIdentifier tableId = Utils.tableIdFromString(namespace, table); + + assertNotNull(tableId); + assertEquals(tableId.toString(), "db.schema.mytable"); + } + + @Test + public void testParentDir() { + String fileLocation = "s3://bucket/path/to/file.txt"; + String parentDir = Utils.parentDir(fileLocation); + assertEquals(parentDir, "s3://bucket/path/to"); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdviceTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdviceTest.java new file mode 100644 index 00000000000000..01aa1136812f9b --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/common/IcebergExceptionHandlerAdviceTest.java @@ -0,0 +1,147 @@ +package io.datahubproject.iceberg.catalog.rest.common; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import org.apache.iceberg.exceptions.*; +import org.apache.iceberg.rest.responses.ErrorResponse; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class IcebergExceptionHandlerAdviceTest { + + private IcebergExceptionHandlerAdvice exceptionHandler; + private static final String TEST_ERROR_MESSAGE = "Test error message"; + + @BeforeMethod + public void setUp() { + exceptionHandler = new IcebergExceptionHandlerAdvice(); + } + + @Test + public void testHandleAlreadyExistsException() { + // Arrange + AlreadyExistsException exception = new AlreadyExistsException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.CONFLICT); + assertErrorResponse( + errorResponse, HttpStatus.CONFLICT.value(), TEST_ERROR_MESSAGE, "AlreadyExistsException"); + } + + @Test + public void testHandleNoSuchNamespaceException() { + // Arrange + NoSuchNamespaceException exception = new NoSuchNamespaceException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.NOT_FOUND); + assertErrorResponse( + errorResponse, + HttpStatus.NOT_FOUND.value(), + TEST_ERROR_MESSAGE, + "NoSuchNamespaceException"); + } + + @Test + public void testHandleNoSuchTableException() { + // Arrange + NoSuchTableException exception = new NoSuchTableException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.NOT_FOUND); + assertErrorResponse( + errorResponse, HttpStatus.NOT_FOUND.value(), TEST_ERROR_MESSAGE, "NoSuchTableException"); + } + + @Test + public void testHandleNoSuchViewException() { + // Arrange + NoSuchViewException exception = new NoSuchViewException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.NOT_FOUND); + assertErrorResponse( + errorResponse, HttpStatus.NOT_FOUND.value(), TEST_ERROR_MESSAGE, "NoSuchViewException"); + } + + @Test + public void testHandleNotFoundException() { + // Arrange + NotFoundException exception = new NotFoundException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.NOT_FOUND); + assertErrorResponse( + errorResponse, HttpStatus.NOT_FOUND.value(), TEST_ERROR_MESSAGE, "NotFoundException"); + } + + @Test + public void testHandleForbiddenException() { + // Arrange + ForbiddenException exception = new ForbiddenException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.FORBIDDEN); + assertErrorResponse( + errorResponse, HttpStatus.FORBIDDEN.value(), TEST_ERROR_MESSAGE, "ForbiddenException"); + } + + @Test + public void testHandleBadRequestException() { + // Arrange + BadRequestException exception = new BadRequestException(TEST_ERROR_MESSAGE); + + // Act + ResponseEntity response = exceptionHandler.handle(exception); + ErrorResponse errorResponse = (ErrorResponse) response.getBody(); + + // Assert + assertEquals(response.getStatusCode(), HttpStatus.BAD_REQUEST); + assertErrorResponse( + errorResponse, HttpStatus.BAD_REQUEST.value(), TEST_ERROR_MESSAGE, "BadRequestException"); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testHandleGenericException() throws Exception { + // Arrange + RuntimeException exception = new RuntimeException(TEST_ERROR_MESSAGE); + + // Act & Assert + exceptionHandler.handle(exception); + } + + private void assertErrorResponse( + ErrorResponse errorResponse, int expectedCode, String expectedMessage, String expectedType) { + assertNotNull(errorResponse, "Error response should not be null"); + assertEquals(errorResponse.code(), expectedCode, "Response code should match"); + assertEquals(errorResponse.message(), expectedMessage, "Error message should match"); + assertEquals(errorResponse.type(), expectedType, "Error type should match"); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiControllerTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiControllerTest.java new file mode 100644 index 00000000000000..b224ce28771a9b --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/open/PublicIcebergApiControllerTest.java @@ -0,0 +1,141 @@ +package io.datahubproject.iceberg.catalog.rest.open; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.linkedin.common.GlobalTags; +import com.linkedin.common.TagAssociation; +import com.linkedin.common.TagAssociationArray; +import com.linkedin.common.urn.TagUrn; +import com.linkedin.dataset.IcebergCatalogInfo; +import com.linkedin.metadata.Constants; +import com.linkedin.platformresource.PlatformResourceInfo; +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.rest.secure.AbstractControllerTest; +import java.io.IOException; +import java.io.InputStream; +import org.apache.iceberg.TableMetadata; +import org.apache.iceberg.TableMetadataParser; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.apache.iceberg.rest.responses.LoadTableResponse; +import org.apache.iceberg.util.JsonUtil; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.springframework.test.util.ReflectionTestUtils; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class PublicIcebergApiControllerTest + extends AbstractControllerTest { + + private static final String PUBLIC_TAG = "public-read"; + + @BeforeMethod + @Override + public void setup() throws Exception { + super.setup(); + ReflectionTestUtils.setField(controller, "isPublicReadEnabled", true); + ReflectionTestUtils.setField(controller, "publiclyReadableTag", PUBLIC_TAG); + } + + @Test(expectedExceptions = NotFoundException.class) + public void testGetConfigPublicReadDisabled() { + ReflectionTestUtils.setField(controller, "isPublicReadEnabled", false); + controller.getConfig("test-warehouse"); + } + + @Test(expectedExceptions = NotFoundException.class) + public void testGetConfigPublicReadDisabledWithEmptyTags() { + ReflectionTestUtils.setField(controller, "isPublicReadEnabled", true); + ReflectionTestUtils.setField(controller, "publiclyReadableTag", null); + controller.getConfig("test-warehouse"); + } + + @Test + public void testGetConfigValidWarehouse() { + // String warehouseName = "test-warehouse"; + ConfigResponse response = controller.getConfig(TEST_PLATFORM); + + assertNotNull(response, "Config response should not be null"); + assertNotNull(response.overrides(), "Overrides map should not be null"); + assertTrue(response.overrides().containsKey("prefix"), "Overrides should contain 'prefix' key"); + assertEquals( + response.overrides().get("prefix"), + TEST_PLATFORM, + "Warehouse name should match in the config override"); + } + + // @Test Seems to fail only in CI due to resource path. Disabling to unblock + public void testLoadTableWithPublicTag() throws Exception { + // Setup public tag + TagUrn publicTagUrn = TagUrn.createFromString("urn:li:tag:" + PUBLIC_TAG); + GlobalTags tags = new GlobalTags(); + TagAssociation tagAssociation = new TagAssociation(); + tagAssociation.setTag(publicTagUrn); + TagAssociationArray tagAssociations = new TagAssociationArray(); + tagAssociations.add(tagAssociation); + tags.setTags(tagAssociations); + + // Mock entity service response + Mockito.when(entityService.getLatestAspect(any(), any(), eq(Constants.GLOBAL_TAGS_ASPECT_NAME))) + .thenReturn(tags); + + IcebergCatalogInfo metadata = new IcebergCatalogInfo(); + metadata.setMetadataPointer(TEST_METADATA_LOCATION); + metadata.setView(false); + Mockito.when( + entityService.getLatestAspect( + any(), any(), eq(DataHubIcebergWarehouse.DATASET_ICEBERG_METADATA_ASPECT_NAME))) + .thenReturn(metadata); + + PlatformResourceInfo platformResourceInfo = new PlatformResourceInfo(); + platformResourceInfo.setPrimaryKey( + "urn:li:dataset:(urn:li:dataPlatform:iceberg," + TEST_PLATFORM + "._uuid__,PROD)"); + Mockito.when( + entityService.getLatestAspect( + any(), any(), eq(Constants.PLATFORM_RESOURCE_INFO_ASPECT_NAME))) + .thenReturn(platformResourceInfo); + + TableMetadata sampleMetadata = loadSampleMetadata(); + try (MockedStatic tableMetadataParserMock = + mockStatic(TableMetadataParser.class)) { + tableMetadataParserMock + .when(() -> TableMetadataParser.read(any(), anyString())) + .thenReturn(sampleMetadata); + + LoadTableResponse response = + controller.loadTable(TEST_PLATFORM, TEST_NAMESPACE, TEST_TABLE, null, null); + + assertNotNull(response, "Load table response should not be null"); + } + } + + TableMetadata loadSampleMetadata() { + try { + InputStream is = getClass().getClassLoader().getResourceAsStream("sample.metadata.json"); + return TableMetadataParser.fromJson( + TEST_METADATA_LOCATION, JsonUtil.mapper().readValue(is, JsonNode.class)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Test(expectedExceptions = org.apache.iceberg.exceptions.NoSuchTableException.class) + public void testLoadTableWithoutPublicTag() throws Exception { + // Mock entity service response with empty tags + GlobalTags tags = new GlobalTags(); + Mockito.when(entityService.getLatestAspect(any(), any(), eq(Constants.GLOBAL_TAGS_ASPECT_NAME))) + .thenReturn(tags); + + controller.loadTable(TEST_PLATFORM, TEST_NAMESPACE, TEST_TABLE, null, null); + } + + @Override + protected PublicIcebergApiController newController() { + return new PublicIcebergApiController(); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractControllerTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractControllerTest.java new file mode 100644 index 00000000000000..15ca1dd2b59693 --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/AbstractControllerTest.java @@ -0,0 +1,161 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; + +import com.datahub.authentication.Actor; +import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.datahub.authentication.AuthenticationContext; +import com.datahub.authorization.AuthorizationRequest; +import com.datahub.authorization.AuthorizationResult; +import com.datahub.plugins.auth.authorization.Authorizer; +import com.google.common.net.HttpHeaders; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.dataplatforminstance.IcebergWarehouseInfo; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.search.EntitySearchService; +import com.linkedin.secret.DataHubSecretValue; +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import io.datahubproject.iceberg.catalog.Utils; +import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.services.SecretService; +import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.servlet.http.HttpServletRequest; +import java.lang.reflect.Field; +import java.util.*; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.BeforeMethod; + +public abstract class AbstractControllerTest { + protected static final String TEST_PLATFORM = "test-platform"; + protected static final String TEST_USER = "test-user"; + protected static final String TEST_CREDENTIALS = "test-credentials"; + + protected static final String TEST_NAMESPACE = "test_namespace"; + protected static final String TEST_TABLE = "test_table"; + protected static final String TEST_METADATA_LOCATION = + "s3://test-location/test_table/sample.metadata.json"; + + @Mock protected EntityService entityService; + @Mock protected CredentialProvider credentialProvider; + @Mock protected Authorizer authorizer; + @Mock protected HttpServletRequest request; + @Mock protected SecretService secretService; + @Mock protected EntitySearchService entitySearchService; + + private OperationContext systemOperationContext; + private Authentication authentication; + private Actor actor; + private IcebergWarehouseInfo icebergWarehouse; + @Mock private RecordTemplate warehouseAspect; + protected T controller; + + @BeforeMethod + public void setup() throws Exception { + MockitoAnnotations.openMocks(this); + when(request.getHeader(HttpHeaders.X_FORWARDED_FOR)).thenReturn("1.2.3.4"); + setupAuthentication(); + setupController(); + setupWarehouseConfiguration(); + onSetup(); + } + + private void setupAuthentication() { + actor = new Actor(ActorType.USER, TEST_USER); + authentication = new Authentication(actor, TEST_CREDENTIALS, Collections.emptyMap()); + AuthenticationContext.setAuthentication(authentication); + } + + private void setupController() throws Exception { + controller = newController(); + systemOperationContext = + TestOperationContexts.systemContext(null, null, null, null, null, null, null, null); + + // Inject dependencies + injectControllerDependencies(); + setupDefaultAuthorization(true); + } + + private void injectControllerDependencies() throws Exception { + injectField("entityService", entityService); + injectField("secretService", secretService); + injectField("authorizer", authorizer); + injectField("systemOperationContext", systemOperationContext); + injectField("cachingCredentialProvider", credentialProvider); + } + + protected void setupDefaultAuthorization(boolean isAuthorized) { + AuthorizationResult.Type resultType = + isAuthorized ? AuthorizationResult.Type.ALLOW : AuthorizationResult.Type.DENY; + String message = isAuthorized ? "Authorized" : "Not authorized"; + + when(authorizer.authorize(any(AuthorizationRequest.class))) + .thenReturn(new AuthorizationResult(mock(AuthorizationRequest.class), resultType, message)); + } + + private void injectField(String fieldName, Object value) throws Exception { + Field field = AbstractIcebergController.class.getDeclaredField(fieldName); + field.setAccessible(true); + field.set(controller, value); + } + + private IcebergWarehouseInfo createTestWarehouse() throws Exception { + IcebergWarehouseInfo warehouse = new IcebergWarehouseInfo(); + warehouse.setClientId(Urn.createFromString("urn:li:secret:clientId")); + warehouse.setClientSecret(Urn.createFromString("urn:li:secret:clientSecret")); + warehouse.setDataRoot("s3://data-root/test/"); + warehouse.setRegion("us-east-1"); + warehouse.setRole("testRole"); + return warehouse; + } + + private void setupWarehouseConfiguration() throws Exception { + // Configure warehouse settings + icebergWarehouse = createTestWarehouse(); + setupWarehouseMocks(); + setupSecretValues(); + } + + private void setupWarehouseMocks() { + when(entityService.getLatestAspect( + any(), + eq(Utils.platformInstanceUrn(TEST_PLATFORM)), + eq(DataHubIcebergWarehouse.DATAPLATFORM_INSTANCE_ICEBERG_WAREHOUSE_ASPECT_NAME))) + .thenReturn(warehouseAspect); + when(warehouseAspect.data()).thenReturn(icebergWarehouse.data()); + } + + private void setupSecretValues() throws Exception { + + String clientId = "testClientId"; + String clientSecret = "testClientSecret"; + when(secretService.decrypt(clientId)).thenReturn("decrypt-" + clientId); + when(secretService.decrypt(clientSecret)).thenReturn("decrypt-" + clientSecret); + + DataHubSecretValue clientIdValue = new DataHubSecretValue(); + clientIdValue.setValue("testClientId"); + + DataHubSecretValue clientSecretValue = new DataHubSecretValue(); + clientSecretValue.setValue("testClientSecret"); + + Map> aspectsMap = new HashMap<>(); + aspectsMap.put(icebergWarehouse.getClientId(), Arrays.asList(clientIdValue)); + aspectsMap.put(icebergWarehouse.getClientSecret(), Arrays.asList(clientSecretValue)); + + when(entityService.getLatestAspects( + any(), + eq(Set.of(icebergWarehouse.getClientId(), icebergWarehouse.getClientSecret())), + eq(Set.of("dataHubSecretValue")), + eq(false))) + .thenReturn(aspectsMap); + } + + protected void onSetup() {} + + protected abstract T newController(); +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiControllerTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiControllerTest.java new file mode 100644 index 00000000000000..6d46fee474b9ff --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergConfigApiControllerTest.java @@ -0,0 +1,56 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import io.datahubproject.iceberg.catalog.DataHubIcebergWarehouse; +import org.apache.iceberg.exceptions.NotFoundException; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class IcebergConfigApiControllerTest + extends AbstractControllerTest { + + @Test(expectedExceptions = NotFoundException.class) + public void testGetConfigNonExistentWarehouse() { + String warehouseName = "test-warehouse"; + + try (MockedStatic warehouseMock = + Mockito.mockStatic(DataHubIcebergWarehouse.class)) { + warehouseMock + .when(() -> DataHubIcebergWarehouse.of(eq(warehouseName), any(), any(), any())) + .thenThrow(new NotFoundException("")); + controller.getConfig(request, warehouseName); + } + } + + @Test + public void testGetConfigValidWarehouse() { + String warehouseName = "test-warehouse"; + + try (MockedStatic warehouseMock = + Mockito.mockStatic(DataHubIcebergWarehouse.class)) { + warehouseMock + .when(() -> DataHubIcebergWarehouse.of(eq(warehouseName), any(), any(), any())) + .thenReturn(null); + ConfigResponse response = controller.getConfig(request, warehouseName); + + assertNotNull(response, "Config response should not be null"); + assertNotNull(response.overrides(), "Overrides map should not be null"); + assertTrue( + response.overrides().containsKey("prefix"), "Overrides should contain 'prefix' key"); + assertEquals( + response.overrides().get("prefix"), + warehouseName, + "Warehouse name should match in the config override"); + } + } + + @Override + protected IcebergConfigApiController newController() { + return new IcebergConfigApiController(); + } +} diff --git a/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiControllerTest.java b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiControllerTest.java new file mode 100644 index 00000000000000..42b1e24fc65dec --- /dev/null +++ b/metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/rest/secure/IcebergNamespaceApiControllerTest.java @@ -0,0 +1,92 @@ +package io.datahubproject.iceberg.catalog.rest.secure; + +import static com.linkedin.metadata.Constants.CONTAINER_PROPERTIES_ASPECT_NAME; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.Urn; +import com.linkedin.container.ContainerProperties; +import io.datahubproject.iceberg.catalog.Utils; +import io.datahubproject.metadata.context.OperationContext; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.exceptions.ForbiddenException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.rest.requests.CreateNamespaceRequest; +import org.apache.iceberg.rest.responses.*; +import org.testng.annotations.*; + +public class IcebergNamespaceApiControllerTest + extends AbstractControllerTest { + + private static final String TEST_PLATFORM = "test-platform"; + private Namespace namespace; + private String namespaceString; + + @Override + public void onSetup() { + setupNamespace(); + } + + @Override + protected IcebergNamespaceApiController newController() { + return new IcebergNamespaceApiController(); + } + + private void setupNamespace() { + Namespace namespaceParent = Namespace.of("db"); + namespace = Namespace.of("db", "schema"); + namespaceString = "db\u001fschema"; + + Urn parentContainerUrn = Utils.containerUrn(TEST_PLATFORM, namespaceParent); + Urn containerUrn = Utils.containerUrn(TEST_PLATFORM, namespace); + + doReturn(true).when(entityService).exists(any(OperationContext.class), eq(parentContainerUrn)); + + ContainerProperties containerProperties = new ContainerProperties(); + when(entityService.getLatestAspect( + any(OperationContext.class), eq(containerUrn), eq(CONTAINER_PROPERTIES_ASPECT_NAME))) + .thenReturn(containerProperties); + } + + @Test + public void testGetNamespace() throws Exception { + GetNamespaceResponse response = + controller.getNamespace(request, TEST_PLATFORM, namespaceString); + + assertNotNull(response); + assertEquals(response.namespace(), namespace); + } + + @Test + public void testCreateNamespace() throws Exception { + CreateNamespaceRequest createRequest = + CreateNamespaceRequest.builder().withNamespace(namespace).build(); + + CreateNamespaceResponse response = + controller.createNamespace(request, TEST_PLATFORM, createRequest); + + assertNotNull(response); + assertEquals(response.namespace(), namespace); + } + + @Test(expectedExceptions = ForbiddenException.class) + public void testCreateNamespaceUnauthorized() throws Exception { + setupDefaultAuthorization(false); + CreateNamespaceRequest createRequest = + CreateNamespaceRequest.builder().withNamespace(namespace).build(); + + controller.createNamespace(request, TEST_PLATFORM, createRequest); + } + + @Test(expectedExceptions = NoSuchNamespaceException.class) + public void testGetNamespaceNonexistent() throws Exception { + String missingNamespaceString = "db\u001fschema2"; + Namespace missingNamespace = Namespace.of("db", "schema2"); + Urn containerUrn = Utils.containerUrn(TEST_PLATFORM, missingNamespace); + + doReturn(false).when(entityService).exists(any(OperationContext.class), eq(containerUrn)); + controller.getNamespace(request, TEST_PLATFORM, missingNamespaceString); + } +} diff --git a/metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml b/metadata-service/iceberg-catalog/src/tests/resources/iceberg-warehouse1.yaml deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java index b78121573811f6..4809b483b74de1 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/config/GlobalControllerExceptionHandler.java @@ -22,7 +22,8 @@ import org.springframework.web.servlet.mvc.support.DefaultHandlerExceptionResolver; @Slf4j -@ControllerAdvice +@ControllerAdvice( + basePackages = {"io.datahubproject.openapi", "com.datahub.graphql", "com.datahub.auth"}) public class GlobalControllerExceptionHandler extends DefaultHandlerExceptionResolver { @PostConstruct diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java index 8f45395832e368..135bf21a237cf5 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/IcebergCatalogServletConfig.java @@ -5,6 +5,6 @@ import org.springframework.web.servlet.config.annotation.EnableWebMvc; @EnableWebMvc -@ComponentScan(basePackages = {"com.datahub.iceberg.catalog"}) +@ComponentScan(basePackages = {"io.datahubproject.iceberg.catalog.rest"}) @Configuration public class IcebergCatalogServletConfig {} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 1f18bf74ec6029..6e4b7de7b3d2b7 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -408,9 +408,7 @@ public class PoliciesConfig { public static final Privilege DATA_READ_ONLY_PRIVILEGE = Privilege.of( - "DATA_READ_ONLY", - "Read only data-access", - "The ability to read the data in a dataset."); + "DATA_READ_ONLY", "Read only data-access", "The ability to read the data in a dataset."); public static final Privilege DATA_READ_WRITE_PRIVILEGE = Privilege.of( @@ -430,6 +428,12 @@ public class PoliciesConfig { "Manage namespaces", "The ability to create and drop namespaces."); + public static final Privilege DATA_LIST_ENTITIES_PRIVILEGE = + Privilege.of( + "DATA_LIST_ENTITIES", + "List tables, views & namespaces", + "The ability to list tables, views and namespaces."); + // Tag Privileges public static final Privilege EDIT_TAG_COLOR_PRIVILEGE = Privilege.of("EDIT_TAG_COLOR", "Edit Tag Color", "The ability to change the color of a Tag."); @@ -798,7 +802,8 @@ public class PoliciesConfig { ImmutableList.of( DATA_MANAGE_VIEWS_PRIVILEGE, DATA_MANAGE_TABLES_PRIVILEGE, - DATA_MANAGE_NAMESPACES_PRIVILEGE)); + DATA_MANAGE_NAMESPACES_PRIVILEGE, + DATA_LIST_ENTITIES_PRIVILEGE)); public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( From a559a027add41aed02af04d4a4c518da5b9ea7c0 Mon Sep 17 00:00:00 2001 From: David Leifker Date: Wed, 29 Jan 2025 16:42:04 -0600 Subject: [PATCH 3/3] fix(json-converter): fix iceberg json converter --- metadata-ingestion/src/datahub/entrypoints.py | 14 +++- .../rest/common/IcebergJsonConverter.java | 83 +++++++++++++++++++ .../rest/common/IcebergSpringWebConfig.java | 22 +---- .../config/SpringWebSchemaRegistryConfig.java | 17 +--- metadata-service/war/build.gradle | 1 + .../java/com/linkedin/gms/ServletConfig.java | 16 ++++ 6 files changed, 114 insertions(+), 39 deletions(-) create mode 100644 metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergJsonConverter.java diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index cdcbffc5c5c2bd..5ae465bbc89eea 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -20,7 +20,6 @@ from datahub.cli.env_utils import get_boolean_env_variable from datahub.cli.exists_cli import exists from datahub.cli.get_cli import get -from datahub.cli.iceberg_cli import iceberg from datahub.cli.ingest_cli import ingest from datahub.cli.migrate import migrate from datahub.cli.put_cli import put @@ -183,7 +182,18 @@ def init(use_password: bool = False) -> None: datahub.add_command(datacontract) datahub.add_command(assertions) datahub.add_command(container) -datahub.add_command(iceberg) + +try: + from datahub.cli.iceberg_cli import iceberg + + datahub.add_command(iceberg) +except ImportError as e: + logger.debug(f"Failed to load datahub iceberg command: {e}") + datahub.add_command( + make_shim_command( + "iceberg", "run `pip install 'acryl-datahub[iceberg-catalog]'`" + ) + ) try: from datahub.cli.lite_cli import lite diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergJsonConverter.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergJsonConverter.java new file mode 100644 index 00000000000000..4936f09fc42c3e --- /dev/null +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergJsonConverter.java @@ -0,0 +1,83 @@ +package io.datahubproject.iceberg.catalog.rest.common; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.lang.reflect.GenericArrayType; +import java.lang.reflect.ParameterizedType; +import java.lang.reflect.Type; +import java.lang.reflect.WildcardType; +import javax.annotation.Nonnull; +import org.springframework.http.MediaType; +import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; + +public class IcebergJsonConverter extends MappingJackson2HttpMessageConverter { + private static final String ICEBERG_PACKAGE_PREFIX = "org.apache.iceberg."; + + public IcebergJsonConverter(ObjectMapper objectMapper) { + super(objectMapper); + } + + @Override + protected boolean supports(@Nonnull Class clazz) { + return isClassInPackage(clazz); + } + + @Override + public boolean canRead(@Nonnull Type type, Class contextClass, MediaType mediaType) { + return hasTypeInPackage(type) && super.canRead(type, contextClass, mediaType); + } + + @Override + public boolean canWrite(@Nonnull Class clazz, MediaType mediaType) { + return isClassInPackage(clazz) && super.canWrite(clazz, mediaType); + } + + private boolean hasTypeInPackage(Type type) { + if (type instanceof Class) { + return isClassInPackage((Class) type); + } + + if (type instanceof ParameterizedType) { + ParameterizedType paramType = (ParameterizedType) type; + + // Check raw type + Type rawType = paramType.getRawType(); + if (rawType instanceof Class && isClassInPackage((Class) rawType)) { + return true; + } + + // Recursively check type arguments + for (Type typeArg : paramType.getActualTypeArguments()) { + if (hasTypeInPackage(typeArg)) { + return true; + } + } + } + + if (type instanceof WildcardType) { + WildcardType wildcardType = (WildcardType) type; + // Check upper bounds + for (Type bound : wildcardType.getUpperBounds()) { + if (hasTypeInPackage(bound)) { + return true; + } + } + // Check lower bounds + for (Type bound : wildcardType.getLowerBounds()) { + if (hasTypeInPackage(bound)) { + return true; + } + } + } + + if (type instanceof GenericArrayType) { + GenericArrayType arrayType = (GenericArrayType) type; + return hasTypeInPackage(arrayType.getGenericComponentType()); + } + + return false; + } + + private static boolean isClassInPackage(@Nonnull Class clazz) { + return clazz.getName().startsWith(ICEBERG_PACKAGE_PREFIX); + } +} diff --git a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java index 954cc512da8cca..79ee41417d878f 100644 --- a/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java +++ b/metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/rest/common/IcebergSpringWebConfig.java @@ -1,34 +1,14 @@ package io.datahubproject.iceberg.catalog.rest.common; -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.PropertyNamingStrategies; import io.datahubproject.iceberg.catalog.credentials.CachingCredentialProvider; import io.datahubproject.iceberg.catalog.credentials.CredentialProvider; import io.datahubproject.iceberg.catalog.credentials.S3CredentialProvider; -import java.util.List; -import org.apache.iceberg.rest.RESTSerializers; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.http.converter.*; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; -import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; @Configuration -public class IcebergSpringWebConfig implements WebMvcConfigurer { - @Override - public void extendMessageConverters(List> converters) { - for (HttpMessageConverter converter : converters) { - if (converter instanceof MappingJackson2HttpMessageConverter jsonConverter) { - ObjectMapper objectMapper = jsonConverter.getObjectMapper(); - objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); - objectMapper.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); - RESTSerializers.registerAll(objectMapper); - break; - } - } - } +public class IcebergSpringWebConfig { @Bean public CredentialProvider credentialProvider() { diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java index c2200eef6e60f5..56b4e575301799 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/config/SpringWebSchemaRegistryConfig.java @@ -8,7 +8,6 @@ import io.swagger.v3.oas.annotations.info.Info; import io.swagger.v3.oas.annotations.servers.Server; import jakarta.servlet.http.HttpServletRequest; -import java.util.List; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; @@ -16,13 +15,7 @@ import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; import org.springframework.core.annotation.Order; -import org.springframework.http.converter.ByteArrayHttpMessageConverter; -import org.springframework.http.converter.FormHttpMessageConverter; -import org.springframework.http.converter.HttpMessageConverter; -import org.springframework.http.converter.StringHttpMessageConverter; -import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter; import org.springframework.web.servlet.config.annotation.EnableWebMvc; -import org.springframework.web.servlet.config.annotation.WebMvcConfigurer; @Slf4j @EnableWebMvc @@ -35,7 +28,7 @@ havingValue = InternalSchemaRegistryFactory.TYPE) @Configuration @ComponentScan(basePackages = {"io.datahubproject.openapi.schema.registry"}) -public class SpringWebSchemaRegistryConfig implements WebMvcConfigurer { +public class SpringWebSchemaRegistryConfig { @Bean public SchemaRegistryController schemaRegistryController( @@ -44,12 +37,4 @@ public SchemaRegistryController schemaRegistryController( @Qualifier("schemaRegistryService") SchemaRegistryService schemaRegistryService) { return new SchemaRegistryController(objectMapper, request, schemaRegistryService); } - - @Override - public void configureMessageConverters(List> messageConverters) { - messageConverters.add(new StringHttpMessageConverter()); - messageConverters.add(new ByteArrayHttpMessageConverter()); - messageConverters.add(new FormHttpMessageConverter()); - messageConverters.add(new MappingJackson2HttpMessageConverter()); - } } diff --git a/metadata-service/war/build.gradle b/metadata-service/war/build.gradle index b173a1dd607f3a..59e0f3d80a24ae 100644 --- a/metadata-service/war/build.gradle +++ b/metadata-service/war/build.gradle @@ -24,6 +24,7 @@ dependencies { implementation project(':metadata-service:openapi-analytics-servlet') implementation project(':metadata-service:schema-registry-servlet') implementation project(':metadata-service:iceberg-catalog') + implementation 'org.apache.iceberg:iceberg-core:1.6.1' runtimeOnly project(':metadata-jobs:mce-consumer') runtimeOnly project(':metadata-jobs:mae-consumer') runtimeOnly project(':metadata-jobs:pe-consumer') diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java index fbd8d72b76c8f2..e09b38a4f1dbb4 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/ServletConfig.java @@ -7,16 +7,21 @@ import com.datahub.gms.servlet.Config; import com.datahub.gms.servlet.ConfigSearchExport; import com.datahub.gms.servlet.HealthCheck; +import com.fasterxml.jackson.annotation.JsonAutoDetect; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.annotation.PropertyAccessor; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.PropertyNamingStrategies; import com.linkedin.r2.transport.http.server.RAPJakartaServlet; import com.linkedin.restli.server.RestliHandlerServlet; +import io.datahubproject.iceberg.catalog.rest.common.IcebergJsonConverter; import io.datahubproject.openapi.converter.StringToChangeCategoryConverter; import java.util.List; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; +import org.apache.iceberg.rest.RESTSerializers; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.web.servlet.FilterRegistrationBean; import org.springframework.boot.web.servlet.ServletRegistrationBean; @@ -129,6 +134,7 @@ public void configureMessageConverters(List> messageConv messageConverters.add(new StringHttpMessageConverter()); messageConverters.add(new ByteArrayHttpMessageConverter()); messageConverters.add(new FormHttpMessageConverter()); + messageConverters.add(createIcebergMessageConverter()); ObjectMapper objectMapper = new ObjectMapper(); int maxSize = @@ -145,6 +151,16 @@ public void configureMessageConverters(List> messageConv messageConverters.add(jsonConverter); } + private HttpMessageConverter createIcebergMessageConverter() { + ObjectMapper objectMapper = new ObjectMapper(); + MappingJackson2HttpMessageConverter jsonConverter = new IcebergJsonConverter(objectMapper); + + objectMapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); + objectMapper.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); + RESTSerializers.registerAll(objectMapper); + return jsonConverter; + } + @Override public void addFormatters(FormatterRegistry registry) { registry.addConverter(new StringToChangeCategoryConverter());