From 8415fc214b1b9d0f99eafbffc01f3e46043f54dc Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Mon, 8 Jan 2024 14:20:03 -0600 Subject: [PATCH 01/16] feat(entity-registry): entity registry plugins (#9538) --- build.gradle | 3 +- .../linkedin/metadata/aspect/plugins/config | 1 + .../linkedin/datahub/graphql/TestUtils.java | 71 ++-- .../BatchUpdateSoftDeletedResolverTest.java | 8 +- .../BatchUpdateDeprecationResolverTest.java | 8 +- .../domain/BatchSetDomainResolverTest.java | 8 +- .../embed/UpdateEmbedResolverTest.java | 9 +- .../owner/AddOwnersResolverTest.java | 8 +- .../owner/BatchAddOwnersResolverTest.java | 8 +- .../owner/BatchRemoveOwnersResolverTest.java | 8 +- .../resolvers/tag/AddTagsResolverTest.java | 9 +- .../tag/BatchAddTagsResolverTest.java | 23 +- .../tag/BatchRemoveTagsResolverTest.java | 18 +- .../resolvers/term/AddTermsResolverTest.java | 29 +- .../term/BatchAddTermsResolverTest.java | 8 +- .../term/BatchRemoveTermsResolverTest.java | 8 +- .../restorebackup/RestoreStorageStep.java | 2 +- .../upgrade/restoreindices/SendMAEStep.java | 2 +- entity-registry/build.gradle | 3 + .../metadata/aspect/batch/AspectsBatch.java | 100 +++++ .../metadata/aspect/batch/BatchItem.java | 66 +++ .../metadata/aspect/batch/MCLBatchItem.java | 58 +++ .../metadata/aspect/batch/MCPBatchItem.java | 46 ++ .../metadata/aspect/batch/PatchItem.java | 26 ++ .../metadata/aspect/batch/SystemAspect.java | 25 ++ .../metadata/aspect/batch/UpsertItem.java | 24 ++ .../aspect/plugins/PluginFactory.java | 269 ++++++++++++ .../metadata/aspect/plugins/PluginSpec.java | 56 +++ .../plugins/config/AspectPluginConfig.java | 50 +++ .../plugins/config/PluginConfiguration.java | 33 ++ .../aspect/plugins/hooks/MCLSideEffect.java | 38 ++ .../aspect/plugins/hooks/MCPSideEffect.java | 36 ++ .../aspect/plugins/hooks/MutationHook.java | 68 +++ .../validation/AspectPayloadValidator.java | 83 ++++ .../plugins/validation/AspectRetriever.java | 13 + .../validation/AspectValidationException.java | 12 + .../metadata/models/DataSchemaFactory.java | 68 +-- .../models/registry/ConfigEntityRegistry.java | 21 +- .../models/registry/EntityRegistry.java | 107 +++++ .../models/registry/MergedEntityRegistry.java | 20 + .../models/registry/PatchEntityRegistry.java | 8 + .../registry/PluginEntityRegistryLoader.java | 5 + .../models/registry/config/Entities.java | 2 + .../config/EntityRegistryLoadResult.java | 17 + .../metadata/aspect/plugins/PluginsTest.java | 211 ++++++++++ .../plugins/hooks/MCLSideEffectTest.java | 69 +++ .../plugins/hooks/MCPSideEffectTest.java | 67 +++ .../plugins/hooks/MutationPluginTest.java | 76 ++++ .../validation/ValidatorPluginTest.java | 97 +++++ .../registry/PatchEntityRegistryTest.java | 19 +- .../test-entity-registry-plugins-1.yml | 67 +++ .../test-entity-registry-plugins-2.yml | 45 ++ .../test-entity-registry-plugins-3.yml | 38 ++ .../metadata/client/JavaEntityClient.java | 16 +- .../client/SystemJavaEntityClient.java | 3 +- .../linkedin/metadata/entity/AspectDao.java | 2 +- .../metadata/entity/EntityAspect.java | 77 ++++ .../metadata/entity/EntityServiceImpl.java | 392 ++++++++++-------- .../linkedin/metadata/entity/EntityUtils.java | 11 +- .../entity/cassandra/CassandraAspectDao.java | 26 +- .../cassandra/CassandraRetentionService.java | 23 +- .../metadata/entity/ebean/EbeanAspectDao.java | 2 +- .../entity/ebean/EbeanRetentionService.java | 23 +- .../entity/ebean/batch/AspectsBatchImpl.java | 143 +++++++ .../entity/ebean/batch/MCLBatchItemImpl.java | 157 +++++++ .../MCPPatchBatchItem.java} | 77 ++-- .../MCPUpsertBatchItem.java} | 137 ++++-- .../ebean/transactions/AspectsBatchImpl.java | 71 ---- .../EntityRegistryUrnValidator.java | 12 +- .../entity/validation/ValidationUtils.java | 34 +- .../service/UpdateIndicesService.java | 89 ++-- .../metadata/AspectIngestionUtils.java | 45 +- .../entity/CassandraEntityServiceTest.java | 2 +- .../entity/EbeanEntityServiceTest.java | 56 ++- .../metadata/entity/EntityServiceTest.java | 190 +++++---- .../TimeseriesAspectServiceTestBase.java | 1 + .../kafka/hook/UpdateIndicesHookTest.java | 12 + .../test/resources/test-entity-registry.yml | 9 + metadata-models-custom/README.md | 246 +++++++++++ metadata-models-custom/build.gradle | 21 +- .../registry/entity-registry.yaml | 36 +- .../CustomDataQualityRulesMCLSideEffect.java | 72 ++++ .../CustomDataQualityRulesMCPSideEffect.java | 33 ++ .../hooks/CustomDataQualityRulesMutator.java | 45 ++ .../CustomDataQualityRulesValidator.java | 70 ++++ .../com/mycompany/dq/DataQualityRuleEvent.pdl | 44 ++ .../token/StatefulTokenService.java | 14 +- .../factory/entity/EntityServiceFactory.java | 3 +- .../entity/JavaEntityClientFactory.java | 3 +- .../entity/RetentionServiceFactory.java | 3 +- .../linkedin/metadata/boot/UpgradeStep.java | 3 +- .../IngestDataPlatformInstancesStep.java | 25 +- .../boot/steps/IngestDataPlatformsStep.java | 35 +- .../boot/steps/IngestOwnershipTypesStep.java | 9 +- .../boot/steps/IngestPoliciesStep.java | 13 +- .../metadata/boot/steps/IngestRolesStep.java | 13 +- .../steps/RestoreColumnLineageIndices.java | 4 +- .../boot/steps/RestoreDbtSiblingsIndices.java | 3 +- .../IngestDataPlatformInstancesStepTest.java | 10 +- .../delegates/EntityApiDelegateImpl.java | 8 +- .../openapi/entities/EntitiesController.java | 22 +- .../entities/PlatformEntitiesController.java | 17 +- .../openapi/util/MappingUtil.java | 17 +- .../java/entities/EntitiesControllerTest.java | 2 +- .../src/test/java/mock/MockEntityService.java | 3 +- .../linkedin/entity/client/EntityClient.java | 12 +- .../entity/client/SystemEntityClient.java | 12 +- .../resources/entity/AspectResource.java | 54 ++- .../resources/entity/EntityResource.java | 2 +- .../metadata/resources/operations/Utils.java | 2 +- .../resources/entity/AspectResourceTest.java | 13 +- .../linkedin/metadata/entity/AspectUtils.java | 42 +- .../metadata/entity/DeleteEntityService.java | 2 +- .../metadata/entity/EntityService.java | 26 +- .../metadata/entity/IngestResult.java | 4 +- .../metadata/entity/RetentionService.java | 20 +- .../metadata/entity/UpdateAspectResult.java | 4 +- .../transactions/AbstractBatchItem.java | 94 ----- .../entity/transactions/AspectsBatch.java | 26 -- .../metadata/utils/SystemMetadataUtils.java | 6 + 120 files changed, 3877 insertions(+), 1000 deletions(-) create mode 120000 buildSrc/src/main/java/com/linkedin/metadata/aspect/plugins/config create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java create mode 100644 entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java create mode 100644 entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java create mode 100644 entity-registry/src/test/resources/test-entity-registry-plugins-1.yml create mode 100644 entity-registry/src/test/resources/test-entity-registry-plugins-2.yml create mode 100644 entity-registry/src/test/resources/test-entity-registry-plugins-3.yml create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java create mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java rename metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/{transactions/PatchBatchItem.java => batch/MCPPatchBatchItem.java} (71%) rename metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/{transactions/UpsertBatchItem.java => batch/MCPUpsertBatchItem.java} (52%) delete mode 100644 metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/AspectsBatchImpl.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java create mode 100644 metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java create mode 100644 metadata-models-custom/src/main/pegasus/com/mycompany/dq/DataQualityRuleEvent.pdl delete mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AbstractBatchItem.java delete mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AspectsBatch.java diff --git a/build.gradle b/build.gradle index 4680598165d28..ee9e530753271 100644 --- a/build.gradle +++ b/build.gradle @@ -231,7 +231,8 @@ project.ext.externalDependency = [ 'common': 'commons-io:commons-io:2.7', 'jline':'jline:jline:1.4.1', 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0', - 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2' + 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2', + 'classGraph': 'io.github.classgraph:classgraph:4.8.165', ] allprojects { diff --git a/buildSrc/src/main/java/com/linkedin/metadata/aspect/plugins/config b/buildSrc/src/main/java/com/linkedin/metadata/aspect/plugins/config new file mode 120000 index 0000000000000..087629f8ac1df --- /dev/null +++ b/buildSrc/src/main/java/com/linkedin/metadata/aspect/plugins/config @@ -0,0 +1 @@ +../../../../../../../../../entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java index 69cd73ecd7d68..de507eda8cdef 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/TestUtils.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql; +import static org.mockito.Mockito.mock; + import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; @@ -10,7 +12,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.mxe.MetadataChangeProposal; @@ -19,13 +22,14 @@ public class TestUtils { - public static EntityService getMockEntityService() { + public static EntityService getMockEntityService() { PathSpecBasedSchemaAnnotationVisitor.class .getClassLoader() .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); EntityRegistry registry = new ConfigEntityRegistry(TestUtils.class.getResourceAsStream("/test-entity-registry.yaml")); - EntityService mockEntityService = Mockito.mock(EntityService.class); + EntityService mockEntityService = + (EntityService) Mockito.mock(EntityService.class); Mockito.when(mockEntityService.getEntityRegistry()).thenReturn(registry); return mockEntityService; } @@ -35,11 +39,11 @@ public static QueryContext getMockAllowContext() { } public static QueryContext getMockAllowContext(String actorUrn) { - QueryContext mockContext = Mockito.mock(QueryContext.class); + QueryContext mockContext = mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(actorUrn); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); + Authorizer mockAuthorizer = mock(Authorizer.class); + AuthorizationResult result = mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.ALLOW); Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(result); @@ -52,11 +56,11 @@ public static QueryContext getMockAllowContext(String actorUrn) { } public static QueryContext getMockAllowContext(String actorUrn, AuthorizationRequest request) { - QueryContext mockContext = Mockito.mock(QueryContext.class); + QueryContext mockContext = mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(actorUrn); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); + Authorizer mockAuthorizer = mock(Authorizer.class); + AuthorizationResult result = mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.ALLOW); Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); @@ -73,11 +77,11 @@ public static QueryContext getMockDenyContext() { } public static QueryContext getMockDenyContext(String actorUrn) { - QueryContext mockContext = Mockito.mock(QueryContext.class); + QueryContext mockContext = mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(actorUrn); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); + Authorizer mockAuthorizer = mock(Authorizer.class); + AuthorizationResult result = mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.DENY); Mockito.when(mockAuthorizer.authorize(Mockito.any())).thenReturn(result); @@ -90,11 +94,11 @@ public static QueryContext getMockDenyContext(String actorUrn) { } public static QueryContext getMockDenyContext(String actorUrn, AuthorizationRequest request) { - QueryContext mockContext = Mockito.mock(QueryContext.class); + QueryContext mockContext = mock(QueryContext.class); Mockito.when(mockContext.getActorUrn()).thenReturn(actorUrn); - Authorizer mockAuthorizer = Mockito.mock(Authorizer.class); - AuthorizationResult result = Mockito.mock(AuthorizationResult.class); + Authorizer mockAuthorizer = mock(Authorizer.class); + AuthorizationResult result = mock(AuthorizationResult.class); Mockito.when(result.getType()).thenReturn(AuthorizationResult.Type.DENY); Mockito.when(mockAuthorizer.authorize(Mockito.eq(request))).thenReturn(result); @@ -107,32 +111,44 @@ public static QueryContext getMockDenyContext(String actorUrn, AuthorizationRequ } public static void verifyIngestProposal( - EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { + EntityService mockService, + int numberOfInvocations, + MetadataChangeProposal proposal) { verifyIngestProposal(mockService, numberOfInvocations, List.of(proposal)); } public static void verifyIngestProposal( - EntityService mockService, int numberOfInvocations, List proposals) { + EntityService mockService, + int numberOfInvocations, + List proposals) { AspectsBatchImpl batch = - AspectsBatchImpl.builder().mcps(proposals, mockService.getEntityRegistry()).build(); + AspectsBatchImpl.builder() + .mcps( + proposals, + mock(AuditStamp.class), + mockService.getEntityRegistry(), + mockService.getSystemEntityClient()) + .build(); Mockito.verify(mockService, Mockito.times(numberOfInvocations)) - .ingestProposal(Mockito.eq(batch), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.eq(batch), Mockito.eq(false)); } public static void verifySingleIngestProposal( - EntityService mockService, int numberOfInvocations, MetadataChangeProposal proposal) { + EntityService mockService, + int numberOfInvocations, + MetadataChangeProposal proposal) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal(Mockito.eq(proposal), Mockito.any(AuditStamp.class), Mockito.eq(false)); } - public static void verifyIngestProposal(EntityService mockService, int numberOfInvocations) { + public static void verifyIngestProposal( + EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); } public static void verifySingleIngestProposal( - EntityService mockService, int numberOfInvocations) { + EntityService mockService, int numberOfInvocations) { Mockito.verify(mockService, Mockito.times(numberOfInvocations)) .ingestProposal( Mockito.any(MetadataChangeProposal.class), @@ -140,12 +156,9 @@ public static void verifySingleIngestProposal( Mockito.eq(false)); } - public static void verifyNoIngestProposal(EntityService mockService) { + public static void verifyNoIngestProposal(EntityService mockService) { Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } private TestUtils() {} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java index 49ccc751d35f6..56b01be29e163 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/delete/BatchUpdateSoftDeletedResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -15,7 +14,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.List; @@ -184,10 +183,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchUpdateSoftDeletedResolver resolver = new BatchUpdateSoftDeletedResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java index 8c3620fa978a9..be7f200a6b9d7 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/deprecation/BatchUpdateDeprecationResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Deprecation; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -16,7 +15,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.List; @@ -217,10 +216,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchUpdateDeprecationResolver resolver = new BatchUpdateDeprecationResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java index d5ba88066e846..32f0d30e7751a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -18,7 +17,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; @@ -311,10 +310,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java index 45a17744a2697..241951319c75e 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/embed/UpdateEmbedResolverTest.java @@ -7,7 +7,6 @@ import com.datahub.authentication.Authentication; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Embed; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; @@ -19,7 +18,7 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; @@ -142,8 +141,7 @@ public void testGetFailureEntityDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); ; } @@ -161,8 +159,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); } @Test diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index 74f88f95fc171..5e199f2c6b2c7 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Owner; import com.linkedin.common.OwnerArray; import com.linkedin.common.Ownership; @@ -21,7 +20,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -399,10 +398,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); AddOwnersResolver resolver = new AddOwnersResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java index 92a789530d6e4..92960f45232b5 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Owner; import com.linkedin.common.OwnerArray; import com.linkedin.common.Ownership; @@ -20,7 +19,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -337,10 +336,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java index 7cef90ffee512..10c95c1bac648 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.Owner; import com.linkedin.common.OwnerArray; import com.linkedin.common.Ownership; @@ -17,7 +16,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -204,10 +203,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java index 340802cde467b..2468cef0e1216 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/AddTagsResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; import com.linkedin.common.TagAssociation; import com.linkedin.common.TagAssociationArray; @@ -17,7 +16,8 @@ import com.linkedin.datahub.graphql.resolvers.mutate.AddTagsResolver; import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; @@ -210,12 +210,11 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { - EntityService mockService = getMockEntityService(); + EntityService mockService = getMockEntityService(); Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); AddTagsResolver resolver = new AddTagsResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java index 71354627b1145..c174d917748eb 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchAddTagsResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; import com.linkedin.common.TagAssociation; import com.linkedin.common.TagAssociationArray; @@ -19,7 +18,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import java.util.List; @@ -197,10 +196,7 @@ public void testGetFailureTagDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -240,10 +236,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -266,10 +259,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -278,10 +268,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchAddTagsResolver resolver = new BatchAddTagsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java index 8cd10afee293e..ba75b41388587 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/tag/BatchRemoveTagsResolverTest.java @@ -5,7 +5,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; import com.linkedin.common.TagAssociation; import com.linkedin.common.TagAssociationArray; @@ -20,7 +19,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; @@ -199,10 +198,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -225,10 +221,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -237,10 +230,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchRemoveTagsResolver resolver = new BatchRemoveTagsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java index cb827a42333b2..397bb533ff871 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/AddTermsResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.GlossaryTermAssociationArray; import com.linkedin.common.GlossaryTerms; @@ -16,7 +15,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.AddTermsResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -58,8 +57,7 @@ public void testGetSuccessNoExistingTerms() throws Exception { // Unable to easily validate exact payload due to the injected timestamp Mockito.verify(mockService, Mockito.times(1)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); Mockito.verify(mockService, Mockito.times(1)) .exists(Mockito.eq(Urn.createFromString(TEST_TERM_1_URN))); @@ -105,8 +103,7 @@ public void testGetSuccessExistingTerms() throws Exception { // Unable to easily validate exact payload due to the injected timestamp Mockito.verify(mockService, Mockito.times(1)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), Mockito.any(AuditStamp.class), Mockito.eq(false)); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.eq(false)); Mockito.verify(mockService, Mockito.times(1)) .exists(Mockito.eq(Urn.createFromString(TEST_TERM_1_URN))); @@ -141,10 +138,7 @@ public void testGetFailureTermDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -173,10 +167,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -195,10 +186,7 @@ public void testGetUnauthorized() throws Exception { assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); Mockito.verify(mockService, Mockito.times(0)) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); } @Test @@ -207,10 +195,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); AddTermsResolver resolver = new AddTermsResolver(Mockito.mock(EntityService.class)); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java index 7df19fad52689..2c85e870dd6ac 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchAddTermsResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.GlossaryTermAssociationArray; import com.linkedin.common.GlossaryTerms; @@ -17,7 +16,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddTermsResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -239,10 +238,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchAddTermsResolver resolver = new BatchAddTermsResolver(mockService); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java index 659ce40542a9c..c2520f4dfb712 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/term/BatchRemoveTermsResolverTest.java @@ -4,7 +4,6 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.linkedin.common.AuditStamp; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.GlossaryTermAssociationArray; import com.linkedin.common.GlossaryTerms; @@ -17,7 +16,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveTermsResolver; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import graphql.schema.DataFetchingEnvironment; import java.util.concurrent.CompletionException; import org.mockito.Mockito; @@ -200,10 +199,7 @@ public void testGetEntityClientException() throws Exception { Mockito.doThrow(RuntimeException.class) .when(mockService) - .ingestProposal( - Mockito.any(AspectsBatchImpl.class), - Mockito.any(AuditStamp.class), - Mockito.anyBoolean()); + .ingestProposal(Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); BatchRemoveTermsResolver resolver = new BatchRemoveTermsResolver(mockService); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java index 5c4567c856d0e..5c4e8cdc47e34 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restorebackup/RestoreStorageStep.java @@ -39,7 +39,7 @@ public class RestoreStorageStep implements UpgradeStep { private static final int REPORT_BATCH_SIZE = 1000; private static final int DEFAULT_THREAD_POOL = 4; - private final EntityService _entityService; + private final EntityService _entityService; private final EntityRegistry _entityRegistry; private final Map>>> _backupReaders; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index 574b1f08b5f54..bedf200a1c055 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -34,7 +34,7 @@ public class SendMAEStep implements UpgradeStep { private static final boolean DEFAULT_URN_BASED_PAGINATION = false; private final Database _server; - private final EntityService _entityService; + private final EntityService _entityService; public class KafkaJob implements Callable { UpgradeContext context; diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 77cca24c0e723..315a29e305b77 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -8,6 +8,7 @@ dependencies { implementation spec.product.pegasus.generator api project(path: ':metadata-models') api project(path: ':metadata-models', configuration: "dataTemplate") + api externalDependency.classGraph implementation externalDependency.slf4jApi compileOnly externalDependency.lombok implementation externalDependency.guava @@ -30,6 +31,8 @@ dependencies { testImplementation externalDependency.testng testImplementation externalDependency.mockito testImplementation externalDependency.mockitoInline + testCompileOnly externalDependency.lombok + testImplementation externalDependency.classGraph } compileTestJava.dependsOn tasks.getByPath(':entity-registry:custom-test-model:modelDeploy') diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java new file mode 100644 index 0000000000000..83e40b22a5e44 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -0,0 +1,100 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +/** + * A batch of aspects in the context of either an MCP or MCL write path to a data store. The item is + * a record that encapsulates the change type, raw aspect and ancillary information like {@link + * SystemMetadata} and record/message created time + */ +public interface AspectsBatch { + List getItems(); + + /** + * Returns MCP items. Can be patch, upsert, etc. + * + * @return batch items + */ + default List getMCPItems() { + return getItems().stream() + .filter(item -> item instanceof MCPBatchItem) + .map(item -> (MCPBatchItem) item) + .collect(Collectors.toList()); + } + + Pair>, List> toUpsertBatchItems( + Map> latestAspects, + EntityRegistry entityRegistry, + AspectRetriever aspectRetriever); + + default Stream applyMCPSideEffects( + List items, EntityRegistry entityRegistry, AspectRetriever aspectRetriever) { + return entityRegistry.getAllMCPSideEffects().stream() + .flatMap(mcpSideEffect -> mcpSideEffect.apply(items, entityRegistry, aspectRetriever)); + } + + default boolean containsDuplicateAspects() { + return getItems().stream() + .map(i -> String.format("%s_%s", i.getClass().getName(), i.hashCode())) + .distinct() + .count() + != getItems().size(); + } + + default Map> getUrnAspectsMap() { + return getItems().stream() + .map(aspect -> Map.entry(aspect.getUrn().toString(), aspect.getAspectName())) + .collect( + Collectors.groupingBy( + Map.Entry::getKey, Collectors.mapping(Map.Entry::getValue, Collectors.toSet()))); + } + + default Map> getNewUrnAspectsMap( + Map> existingMap, List items) { + Map> newItemsMap = + items.stream() + .map(aspect -> Map.entry(aspect.getUrn().toString(), aspect.getAspectName())) + .collect( + Collectors.groupingBy( + Map.Entry::getKey, + Collectors.mapping( + Map.Entry::getValue, Collectors.toCollection(HashSet::new)))); + + return newItemsMap.entrySet().stream() + .filter( + entry -> + !existingMap.containsKey(entry.getKey()) + || !existingMap.get(entry.getKey()).containsAll(entry.getValue())) + .peek( + entry -> { + if (existingMap.containsKey(entry.getKey())) { + entry.getValue().removeAll(existingMap.get(entry.getKey())); + } + }) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + default Map> merge( + @Nonnull Map> a, @Nonnull Map> b) { + return Stream.concat(a.entrySet().stream(), b.entrySet().stream()) + .flatMap( + entry -> + entry.getValue().entrySet().stream() + .map(innerEntry -> Pair.of(entry.getKey(), innerEntry))) + .collect( + Collectors.groupingBy( + Pair::getKey, + Collectors.mapping( + Pair::getValue, Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java new file mode 100644 index 0000000000000..a4c0624150532 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/BatchItem.java @@ -0,0 +1,66 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nonnull; + +public interface BatchItem { + /** + * The urn associated with the aspect + * + * @return + */ + Urn getUrn(); + + /** + * Aspect's name + * + * @return the name + */ + @Nonnull + default String getAspectName() { + return getAspectSpec().getName(); + } + + /** + * System information + * + * @return the system metadata + */ + SystemMetadata getSystemMetadata(); + + /** + * Timestamp and actor + * + * @return the audit information + */ + AuditStamp getAuditStamp(); + + /** + * The type of change + * + * @return change type + */ + @Nonnull + ChangeType getChangeType(); + + /** + * The entity's schema + * + * @return entity specification + */ + @Nonnull + EntitySpec getEntitySpec(); + + /** + * The aspect's schema + * + * @return aspect's specification + */ + @Nonnull + AspectSpec getAspectSpec(); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java new file mode 100644 index 0000000000000..30e882705da45 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCLBatchItem.java @@ -0,0 +1,58 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** An item that represents a change that has been written to primary storage. */ +public interface MCLBatchItem extends BatchItem { + + @Nonnull + MetadataChangeLog getMetadataChangeLog(); + + @Override + default Urn getUrn() { + return getMetadataChangeLog().getEntityUrn(); + } + + @Nonnull + @Override + default String getAspectName() { + if (getMetadataChangeLog().getAspectName() != null) { + return getMetadataChangeLog().getAspectName(); + } else { + return getAspect().schema().getName(); + } + } + + @Override + default SystemMetadata getSystemMetadata() { + return getMetadataChangeLog().getSystemMetadata(); + } + + default SystemMetadata getPreviousSystemMetadata() { + return getMetadataChangeLog().getPreviousSystemMetadata(); + } + + @Nullable + RecordTemplate getPreviousAspect(); + + @Nonnull + RecordTemplate getAspect(); + + @Override + @Nonnull + default ChangeType getChangeType() { + return getMetadataChangeLog().getChangeType(); + } + + @Override + default AuditStamp getAuditStamp() { + return getMetadataChangeLog().getCreated(); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java new file mode 100644 index 0000000000000..bb5e0ac53934a --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/MCPBatchItem.java @@ -0,0 +1,46 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.template.AspectTemplateEngine; +import com.linkedin.mxe.MetadataChangeProposal; +import javax.annotation.Nullable; + +/** Represents a proposal to write to the primary data store which may be represented by an MCP */ +public abstract class MCPBatchItem implements BatchItem { + + @Nullable + public abstract MetadataChangeProposal getMetadataChangeProposal(); + + /** + * Validates that a change type is valid for the given aspect + * + * @param changeType + * @param aspectSpec + * @return + */ + protected static boolean isValidChangeType(ChangeType changeType, AspectSpec aspectSpec) { + if (aspectSpec.isTimeseries()) { + // Timeseries aspects only support UPSERT + return ChangeType.UPSERT.equals(changeType); + } else { + if (ChangeType.PATCH.equals(changeType)) { + return supportsPatch(aspectSpec); + } else { + return ChangeType.UPSERT.equals(changeType); + } + } + } + + protected static boolean supportsPatch(AspectSpec aspectSpec) { + // Limit initial support to defined templates + if (!AspectTemplateEngine.SUPPORTED_TEMPLATES.contains(aspectSpec.getName())) { + // Prevent unexpected behavior for aspects that do not currently have 1st class patch support, + // specifically having array based fields that require merging without specifying merge + // behavior can get into bad states + throw new UnsupportedOperationException( + "Aspect: " + aspectSpec.getName() + " does not currently support patch " + "operations."); + } + return true; + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java new file mode 100644 index 0000000000000..f790c12ee5335 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchItem.java @@ -0,0 +1,26 @@ +package com.linkedin.metadata.aspect.batch; + +import com.github.fge.jsonpatch.Patch; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; + +/** + * A change proposal represented as a patch to an exiting stored object in the primary data store. + */ +public abstract class PatchItem extends MCPBatchItem { + + /** + * Convert a Patch to an Upsert + * + * @param entityRegistry the entity registry + * @param recordTemplate the current value record template + * @return the upsert + */ + public abstract UpsertItem applyPatch( + EntityRegistry entityRegistry, + RecordTemplate recordTemplate, + AspectRetriever aspectRetriever); + + public abstract Patch getPatch(); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java new file mode 100644 index 0000000000000..88ac902ae52fe --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/SystemAspect.java @@ -0,0 +1,25 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.SystemMetadata; +import java.sql.Timestamp; + +/** + * An aspect along with system metadata and creation timestamp. Represents an aspect as stored in + * primary storage. + */ +public interface SystemAspect { + Urn getUrn(); + + String getAspectName(); + + long getVersion(); + + RecordTemplate getRecordTemplate(EntityRegistry entityRegistry); + + SystemMetadata getSystemMetadata(); + + Timestamp getCreatedOn(); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java new file mode 100644 index 0000000000000..4e4d2a38799dc --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/UpsertItem.java @@ -0,0 +1,24 @@ +package com.linkedin.metadata.aspect.batch; + +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.linkedin.metadata.models.registry.EntityRegistry; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** + * A proposal to write data to the primary datastore which includes system metadata and other + * related data stored along with the aspect + */ +public abstract class UpsertItem extends MCPBatchItem { + public abstract RecordTemplate getAspect(); + + public abstract SystemAspect toLatestEntityAspect(); + + public abstract void validatePreCommit( + @Nullable RecordTemplate previous, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException; +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java new file mode 100644 index 0000000000000..dd9bbcda8f4af --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -0,0 +1,269 @@ +package com.linkedin.metadata.aspect.plugins; + +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; +import com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffect; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.models.registry.config.EntityRegistryLoadResult; +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ClassInfo; +import io.github.classgraph.MethodInfo; +import io.github.classgraph.ScanResult; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class PluginFactory { + + public static PluginFactory withCustomClasspath( + @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { + return new PluginFactory(pluginConfiguration, classLoaders); + } + + public static PluginFactory withConfig(@Nullable PluginConfiguration pluginConfiguration) { + return PluginFactory.withCustomClasspath(pluginConfiguration, List.of()); + } + + public static PluginFactory empty() { + return PluginFactory.withConfig(PluginConfiguration.EMPTY); + } + + public static PluginFactory merge(PluginFactory a, PluginFactory b) { + return PluginFactory.withCustomClasspath( + PluginConfiguration.merge(a.getPluginConfiguration(), b.getPluginConfiguration()), + Stream.concat(a.getClassLoaders().stream(), b.getClassLoaders().stream()) + .collect(Collectors.toList())); + } + + @Getter private final PluginConfiguration pluginConfiguration; + @Nonnull @Getter private final List classLoaders; + @Getter private final List aspectPayloadValidators; + @Getter private final List mutationHooks; + @Getter private final List mclSideEffects; + @Getter private final List mcpSideEffects; + + private final ClassGraph classGraph; + + public PluginFactory( + @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { + this.classGraph = + new ClassGraph() + .enableRemoteJarScanning() + .enableExternalClasses() + .enableClassInfo() + .enableMethodInfo(); + + this.classLoaders = classLoaders; + + if (!this.classLoaders.isEmpty()) { + classLoaders.forEach(this.classGraph::addClassLoader); + } + + this.pluginConfiguration = + pluginConfiguration == null ? PluginConfiguration.EMPTY : pluginConfiguration; + this.aspectPayloadValidators = buildAspectPayloadValidators(this.pluginConfiguration); + this.mutationHooks = buildMutationHooks(this.pluginConfiguration); + this.mclSideEffects = buildMCLSideEffects(this.pluginConfiguration); + this.mcpSideEffects = buildMCPSideEffects(this.pluginConfiguration); + } + + /** + * Returns applicable {@link AspectPayloadValidator} implementations given the change type and + * entity/aspect information. + * + * @param changeType The type of change to be validated + * @param entityName The entity name + * @param aspectName The aspect name + * @return List of validator implementations + */ + @Nonnull + public List getAspectPayloadValidators( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return aspectPayloadValidators.stream() + .filter(plugin -> plugin.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + /** + * Return mutation hooks for {@link com.linkedin.data.template.RecordTemplate} + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return Mutation hooks + */ + @Nonnull + public List getMutationHooks( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return mutationHooks.stream() + .filter(plugin -> plugin.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + /** + * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeProposal}. Side + * effects can generate one or more additional MCPs during write operations. + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return MCP side effects + */ + @Nonnull + public List getMCPSideEffects( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return mcpSideEffects.stream() + .filter(plugin -> plugin.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + /** + * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeLog}. Side effects + * can generate one or more additional MCLs during write operations. + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return MCL side effects + */ + @Nonnull + public List getMCLSideEffects( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return mclSideEffects.stream() + .filter(plugin -> plugin.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + @Nonnull + public EntityRegistryLoadResult.PluginLoadResult getPluginLoadResult() { + return EntityRegistryLoadResult.PluginLoadResult.builder() + .validatorCount(aspectPayloadValidators.size()) + .mutationHookCount(mutationHooks.size()) + .mcpSideEffectCount(mcpSideEffects.size()) + .mclSideEffectCount(mclSideEffects.size()) + .validatorClasses( + aspectPayloadValidators.stream() + .map(cls -> cls.getClass().getName()) + .collect(Collectors.toSet())) + .mutationHookClasses( + mutationHooks.stream().map(cls -> cls.getClass().getName()).collect(Collectors.toSet())) + .mcpSideEffectClasses( + mcpSideEffects.stream() + .map(cls -> cls.getClass().getName()) + .collect(Collectors.toSet())) + .mclSideEffectClasses( + mclSideEffects.stream() + .map(cls -> cls.getClass().getName()) + .collect(Collectors.toSet())) + .build(); + } + + private List buildAspectPayloadValidators( + @Nullable PluginConfiguration pluginConfiguration) { + return pluginConfiguration == null + ? List.of() + : applyDisable( + build( + AspectPayloadValidator.class, + pluginConfiguration.getAspectPayloadValidators(), + "com.linkedin.metadata.aspect.plugins.validation")); + } + + private List buildMutationHooks(@Nullable PluginConfiguration pluginConfiguration) { + return pluginConfiguration == null + ? List.of() + : applyDisable( + build( + MutationHook.class, + pluginConfiguration.getMutationHooks(), + "com.linkedin.metadata.aspect.plugins.hooks")); + } + + private List buildMCLSideEffects( + @Nullable PluginConfiguration pluginConfiguration) { + return pluginConfiguration == null + ? List.of() + : applyDisable( + build( + MCLSideEffect.class, + pluginConfiguration.getMclSideEffects(), + "com.linkedin.metadata.aspect.plugins.hooks")); + } + + private List buildMCPSideEffects( + @Nullable PluginConfiguration pluginConfiguration) { + return pluginConfiguration == null + ? List.of() + : applyDisable( + build( + MCPSideEffect.class, + pluginConfiguration.getMcpSideEffects(), + "com.linkedin.metadata.aspect.plugins.hooks")); + } + + private List build( + Class baseClazz, List configs, String... packageNames) { + try (ScanResult scanResult = classGraph.acceptPackages(packageNames).scan()) { + + Map classMap = + scanResult.getSubclasses(baseClazz).stream() + .collect(Collectors.toMap(ClassInfo::getName, Function.identity())); + + return configs.stream() + .flatMap( + config -> { + try { + ClassInfo classInfo = classMap.get(config.getClassName()); + MethodInfo constructorMethod = classInfo.getConstructorInfo().get(0); + return Stream.of( + (T) constructorMethod.loadClassAndGetConstructor().newInstance(config)); + } catch (Exception e) { + log.error( + "Error constructing entity registry plugin class: {}", + config.getClassName(), + e); + return Stream.empty(); + } + }) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new IllegalArgumentException( + String.format("Failed to load entity registry plugins: %s.", baseClazz.getName()), e); + } + } + + @Nonnull + private static List applyDisable(@Nonnull List plugins) { + return IntStream.range(0, plugins.size()) + .mapToObj( + idx -> { + List subsequentPlugins = plugins.subList(idx + 1, plugins.size()); + T thisPlugin = plugins.get(idx); + AspectPluginConfig thisPluginConfig = thisPlugin.getConfig(); + + if (subsequentPlugins.stream() + .anyMatch( + otherPlugin -> thisPluginConfig.isDisabledBy(otherPlugin.getConfig()))) { + return null; + } + + return thisPlugin; + }) + .filter(Objects::nonNull) + .filter(p -> p.getConfig().isEnabled()) + .collect(Collectors.toList()); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java new file mode 100644 index 0000000000000..03a0473677fb8 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java @@ -0,0 +1,56 @@ +package com.linkedin.metadata.aspect.plugins; + +import com.linkedin.common.urn.Urn; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.AspectSpec; +import javax.annotation.Nonnull; +import lombok.AllArgsConstructor; +import lombok.EqualsAndHashCode; + +@AllArgsConstructor +@EqualsAndHashCode +public abstract class PluginSpec { + protected static String ENTITY_WILDCARD = "*"; + + private final AspectPluginConfig aspectPluginConfig; + + protected AspectPluginConfig getConfig() { + return this.aspectPluginConfig; + } + + public boolean shouldApply( + @Nonnull ChangeType changeType, @Nonnull Urn entityUrn, @Nonnull AspectSpec aspectSpec) { + return shouldApply(changeType, entityUrn.getEntityType(), aspectSpec); + } + + public boolean shouldApply( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull AspectSpec aspectSpec) { + return shouldApply(changeType, entityName, aspectSpec.getName()); + } + + public boolean shouldApply( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return getConfig().isEnabled() + && isChangeTypeSupported(changeType) + && isEntityAspectSupported(entityName, aspectName); + } + + protected boolean isEntityAspectSupported( + @Nonnull String entityName, @Nonnull String aspectName) { + return (ENTITY_WILDCARD.equals(entityName) + || getConfig().getSupportedEntityAspectNames().stream() + .anyMatch(supported -> supported.getEntityName().equals(entityName))) + && isAspectSupported(aspectName); + } + + protected boolean isAspectSupported(@Nonnull String aspectName) { + return getConfig().getSupportedEntityAspectNames().stream() + .anyMatch(supported -> supported.getAspectName().equals(aspectName)); + } + + protected boolean isChangeTypeSupported(@Nonnull ChangeType changeType) { + return getConfig().getSupportedOperations().stream() + .anyMatch(supported -> changeType.toString().equals(supported)); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java new file mode 100644 index 0000000000000..059f133ad2776 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java @@ -0,0 +1,50 @@ +package com.linkedin.metadata.aspect.plugins.config; + +import java.util.List; +import javax.annotation.Nonnull; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@NoArgsConstructor +@AllArgsConstructor +@Builder +public class AspectPluginConfig { + @Nonnull private String className; + private boolean enabled; + + @Nonnull private List supportedOperations; + @Nonnull private List supportedEntityAspectNames; + + @Data + @NoArgsConstructor + @AllArgsConstructor + @Builder + public static class EntityAspectName { + @Nonnull private String entityName; + @Nonnull private String aspectName; + } + + /** + * Used to determine is an earlier plugin is disabled by a subsequent plugin + * + * @param o the other plugin + * @return whether this plugin should be disabled based on another plugin + */ + public boolean isDisabledBy(AspectPluginConfig o) { + return enabled && this.isEqualExcludingEnabled(o) && !o.enabled; + } + + private boolean isEqualExcludingEnabled(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + AspectPluginConfig that = (AspectPluginConfig) o; + + if (!className.equals(that.className)) return false; + if (!supportedOperations.equals(that.supportedOperations)) return false; + return supportedEntityAspectNames.equals(that.supportedEntityAspectNames); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java new file mode 100644 index 0000000000000..a4d0678c130f3 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.aspect.plugins.config; + +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@AllArgsConstructor +@NoArgsConstructor +public class PluginConfiguration { + private List aspectPayloadValidators = List.of(); + private List mutationHooks = List.of(); + private List mclSideEffects = List.of(); + private List mcpSideEffects = List.of(); + + public static PluginConfiguration EMPTY = new PluginConfiguration(); + + public static PluginConfiguration merge(PluginConfiguration a, PluginConfiguration b) { + return new PluginConfiguration( + Stream.concat( + a.getAspectPayloadValidators().stream(), b.getAspectPayloadValidators().stream()) + .collect(Collectors.toList()), + Stream.concat(a.getMutationHooks().stream(), b.getMutationHooks().stream()) + .collect(Collectors.toList()), + Stream.concat(a.getMclSideEffects().stream(), b.getMclSideEffects().stream()) + .collect(Collectors.toList()), + Stream.concat(a.getMcpSideEffects().stream(), b.getMcpSideEffects().stream()) + .collect(Collectors.toList())); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java new file mode 100644 index 0000000000000..ef9786f8d711e --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java @@ -0,0 +1,38 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +/** Given an MCL produce additional MCLs for writing */ +public abstract class MCLSideEffect extends PluginSpec { + + public MCLSideEffect(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + /** + * Given a list of MCLs, output additional MCLs + * + * @param input list + * @return additional upserts + */ + public final Stream apply( + @Nonnull List input, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + return input.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .flatMap(i -> applyMCLSideEffect(i, entityRegistry, aspectRetriever)); + } + + protected abstract Stream applyMCLSideEffect( + @Nonnull MCLBatchItem input, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java new file mode 100644 index 0000000000000..fc1d1587d10fb --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +/** Given an MCP produce additional MCPs to write */ +public abstract class MCPSideEffect extends PluginSpec { + + public MCPSideEffect(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + /** + * Given the list of MCP upserts, output additional upserts + * + * @param input list + * @return additional upserts + */ + public final Stream apply( + List input, + EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + return input.stream() + .filter(item -> shouldApply(item.getChangeType(), item.getUrn(), item.getAspectSpec())) + .flatMap(i -> applyMCPSideEffect(i, entityRegistry, aspectRetriever)); + } + + protected abstract Stream applyMCPSideEffect( + UpsertItem input, EntityRegistry entityRegistry, @Nonnull AspectRetriever aspectRetriever); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java new file mode 100644 index 0000000000000..730a494c03d7b --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java @@ -0,0 +1,68 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.common.AuditStamp; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +/** Applies changes to the RecordTemplate prior to write */ +public abstract class MutationHook extends PluginSpec { + + public MutationHook(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + /** + * Mutating hook + * + * @param changeType Type of change to mutate + * @param entitySpec Entity specification + * @param aspectSpec Aspect specification + * @param oldAspectValue old aspect vale if it exists + * @param newAspectValue the new aspect + * @param oldSystemMetadata old system metadata if it exists + * @param newSystemMetadata the new system metadata + * @param auditStamp the audit stamp + */ + public final void applyMutation( + @Nonnull final ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspectValue, + @Nullable final RecordTemplate newAspectValue, + @Nullable final SystemMetadata oldSystemMetadata, + @Nullable final SystemMetadata newSystemMetadata, + @Nonnull AuditStamp auditStamp, + @Nonnull AspectRetriever aspectRetriever) { + if (shouldApply(changeType, entitySpec.getName(), aspectSpec)) { + mutate( + changeType, + entitySpec, + aspectSpec, + oldAspectValue, + newAspectValue, + oldSystemMetadata, + newSystemMetadata, + auditStamp, + aspectRetriever); + } + } + + protected abstract void mutate( + @Nonnull final ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspectValue, + @Nullable final RecordTemplate newAspectValue, + @Nullable final SystemMetadata oldSystemMetadata, + @Nullable final SystemMetadata newSystemMetadata, + @Nonnull AuditStamp auditStamp, + @Nonnull AspectRetriever aspectRetriever); +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java new file mode 100644 index 0000000000000..656d017724571 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java @@ -0,0 +1,83 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.AspectSpec; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public abstract class AspectPayloadValidator extends PluginSpec { + + public AspectPayloadValidator(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + /** + * Validate a proposal for the given change type for an aspect within the context of the given + * entity's urn. + * + * @param changeType The change type + * @param entityUrn The parent entity for the aspect + * @param aspectSpec The aspect's specification + * @param aspectPayload The aspect's payload + * @return whether the aspect proposal is valid + * @throws AspectValidationException + */ + public final void validateProposed( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nonnull RecordTemplate aspectPayload, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + if (shouldApply(changeType, entityUrn, aspectSpec)) { + validateProposedAspect(changeType, entityUrn, aspectSpec, aspectPayload, aspectRetriever); + } + } + + /** + * Validate the proposed aspect as its about to be written with the context of the previous + * version of the aspect (if it existed) + * + * @param changeType The change type + * @param entityUrn The parent entity for the aspect + * @param aspectSpec The aspect's specification + * @param previousAspect The previous version of the aspect if it exists + * @param proposedAspect The new version of the aspect + * @return whether the aspect proposal is valid + * @throws AspectValidationException + */ + public final void validatePreCommit( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate previousAspect, + @Nonnull RecordTemplate proposedAspect, + AspectRetriever aspectRetriever) + throws AspectValidationException { + if (shouldApply(changeType, entityUrn, aspectSpec)) { + validatePreCommitAspect( + changeType, entityUrn, aspectSpec, previousAspect, proposedAspect, aspectRetriever); + } + } + + protected abstract void validateProposedAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nonnull RecordTemplate aspectPayload, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException; + + protected abstract void validatePreCommitAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate previousAspect, + @Nonnull RecordTemplate proposedAspect, + AspectRetriever aspectRetriever) + throws AspectValidationException; +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java new file mode 100644 index 0000000000000..78aa4689472f5 --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectRetriever.java @@ -0,0 +1,13 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; +import com.linkedin.r2.RemoteInvocationException; +import java.net.URISyntaxException; +import javax.annotation.Nonnull; + +public interface AspectRetriever { + + Aspect getLatestAspectObject(@Nonnull final Urn urn, @Nonnull final String aspectName) + throws RemoteInvocationException, URISyntaxException; +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java new file mode 100644 index 0000000000000..f858bdcf141ae --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectValidationException.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +public class AspectValidationException extends Exception { + + public AspectValidationException(String msg) { + super(msg); + } + + public AspectValidationException(String msg, Exception e) { + super(msg, e); + } +} diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/DataSchemaFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/models/DataSchemaFactory.java index b9766d0ca8640..e41e8159f64f2 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/DataSchemaFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/DataSchemaFactory.java @@ -20,6 +20,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; import org.reflections.Reflections; @@ -64,37 +65,48 @@ public static DataSchemaFactory withCustomClasspath(Path pluginLocation) throws // no custom classpath, just return the default factory return INSTANCE; } - // first we load up classes from the classpath - File pluginDir = pluginLocation.toFile(); - if (!pluginDir.exists()) { - throw new RuntimeException( - "Failed to find plugin directory " - + pluginDir.getAbsolutePath() - + ". Current directory is " - + new File(".").getAbsolutePath()); - } - List urls = new ArrayList(); - if (pluginDir.isDirectory()) { - List jarFiles = - Files.walk(pluginLocation) - .filter(Files::isRegularFile) - .filter(p -> p.toString().endsWith(".jar")) - .collect(Collectors.toList()); - for (Path f : jarFiles) { - URL url = f.toUri().toURL(); - if (url != null) { - urls.add(url); + + return new DataSchemaFactory( + DEFAULT_TOP_LEVEL_NAMESPACES, getClassLoader(pluginLocation).get()); + } + + public static Optional getClassLoader(@Nullable Path pluginLocation) + throws IOException { + if (pluginLocation == null) { + return Optional.empty(); + } else { + // first we load up classes from the classpath + File pluginDir = pluginLocation.toFile(); + if (!pluginDir.exists()) { + throw new RuntimeException( + "Failed to find plugin directory " + + pluginDir.getAbsolutePath() + + ". Current directory is " + + new File(".").getAbsolutePath()); + } + List urls = new ArrayList(); + if (pluginDir.isDirectory()) { + List jarFiles = + Files.walk(pluginLocation) + .filter(Files::isRegularFile) + .filter(p -> p.toString().endsWith(".jar")) + .collect(Collectors.toList()); + for (Path f : jarFiles) { + URL url = f.toUri().toURL(); + if (url != null) { + urls.add(url); + } } + } else { + URL url = (pluginLocation.toUri().toURL()); + urls.add(url); } - } else { - URL url = (pluginLocation.toUri().toURL()); - urls.add(url); + URL[] urlsArray = new URL[urls.size()]; + urls.toArray(urlsArray); + URLClassLoader classLoader = + new URLClassLoader(urlsArray, Thread.currentThread().getContextClassLoader()); + return Optional.of(classLoader); } - URL[] urlsArray = new URL[urls.size()]; - urls.toArray(urlsArray); - URLClassLoader classLoader = - new URLClassLoader(urlsArray, Thread.currentThread().getContextClassLoader()); - return new DataSchemaFactory(DEFAULT_TOP_LEVEL_NAMESPACES, classLoader); } /** diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index fba916abd2430..ce8718c536fbe 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.linkedin.data.schema.DataSchema; +import com.linkedin.metadata.aspect.plugins.PluginFactory; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -33,6 +34,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** @@ -43,6 +45,7 @@ public class ConfigEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; + @Getter private final PluginFactory pluginFactory; private final Map entityNameToSpec; private final Map eventNameToSpec; private final List entitySpecs; @@ -64,6 +67,7 @@ public class ConfigEntityRegistry implements EntityRegistry { public ConfigEntityRegistry(Pair configFileClassPathPair) throws IOException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), + DataSchemaFactory.getClassLoader(configFileClassPathPair.getSecond()).stream().toList(), configFileClassPathPair.getFirst()); } @@ -108,24 +112,29 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) } public ConfigEntityRegistry(InputStream configFileInputStream) { - this(DataSchemaFactory.getInstance(), configFileInputStream); + this(DataSchemaFactory.getInstance(), List.of(), configFileInputStream); } - public ConfigEntityRegistry(DataSchemaFactory dataSchemaFactory, Path configFilePath) + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, List classLoaders, Path configFilePath) throws FileNotFoundException { - this(dataSchemaFactory, new FileInputStream(configFilePath.toString())); + this(dataSchemaFactory, classLoaders, new FileInputStream(configFilePath.toString())); } - public ConfigEntityRegistry(DataSchemaFactory dataSchemaFactory, InputStream configFileStream) { + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, + List classLoaders, + InputStream configFileStream) { this.dataSchemaFactory = dataSchemaFactory; Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); } catch (IOException e) { - e.printStackTrace(); throw new IllegalArgumentException( String.format( - "Error while reading config file in path %s: %s", configFileStream, e.getMessage())); + "Error while reading config file in path %s: %s", configFileStream, e.getMessage()), + e); } if (entities.getId() != null) { identifier = entities.getId(); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java index 8c415d56f0d5f..fbc3285579cc0 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java @@ -1,11 +1,19 @@ package com.linkedin.metadata.models.registry; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffect; +import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DefaultEntitySpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.EventSpec; import com.linkedin.metadata.models.registry.template.AspectTemplateEngine; +import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -68,4 +76,103 @@ default String getIdentifier() { */ @Nonnull AspectTemplateEngine getAspectTemplateEngine(); + + /** + * Returns applicable {@link AspectPayloadValidator} implementations given the change type and + * entity/aspect information. + * + * @param changeType The type of change to be validated + * @param entityName The entity name + * @param aspectName The aspect name + * @return List of validator implementations + */ + @Nonnull + default List getAspectPayloadValidators( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return getAllAspectPayloadValidators().stream() + .filter( + aspectPayloadValidator -> + aspectPayloadValidator.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + @Nonnull + default List getAllAspectPayloadValidators() { + return getPluginFactory().getAspectPayloadValidators(); + } + + /** + * Return mutation hooks for {@link com.linkedin.data.template.RecordTemplate} + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return Mutation hooks + */ + @Nonnull + default List getMutationHooks( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return getAllMutationHooks().stream() + .filter(mutationHook -> mutationHook.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + @Nonnull + default List getAllMutationHooks() { + return getPluginFactory().getMutationHooks(); + } + + /** + * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeProposal}. Side + * effects can generate one or more additional MCPs during write operations. + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return MCP side effects + */ + @Nonnull + default List getMCPSideEffects( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return getAllMCPSideEffects().stream() + .filter(mcpSideEffect -> mcpSideEffect.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + @Nonnull + default List getAllMCPSideEffects() { + return getPluginFactory().getMcpSideEffects(); + } + + /** + * Returns the side effects to apply to {@link com.linkedin.mxe.MetadataChangeLog}. Side effects + * can generate one or more additional MCLs during write operations. + * + * @param changeType The type of change + * @param entityName The entity name + * @param aspectName The aspect name + * @return MCL side effects + */ + @Nonnull + default List getMCLSideEffects( + @Nonnull ChangeType changeType, @Nonnull String entityName, @Nonnull String aspectName) { + return getAllMCLSideEffects().stream() + .filter(mclSideEffect -> mclSideEffect.shouldApply(changeType, entityName, aspectName)) + .collect(Collectors.toList()); + } + + @Nonnull + default List getAllMCLSideEffects() { + return getPluginFactory().getMclSideEffects(); + } + + /** + * Returns underlying plugin factory + * + * @return the plugin factory + */ + @Nonnull + default PluginFactory getPluginFactory() { + return PluginFactory.empty(); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 06aeefc2e5aa0..285b96b93d1d6 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -3,6 +3,7 @@ import com.linkedin.data.schema.compatibility.CompatibilityChecker; import com.linkedin.data.schema.compatibility.CompatibilityOptions; import com.linkedin.data.schema.compatibility.CompatibilityResult; +import com.linkedin.metadata.aspect.plugins.PluginFactory; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.ConfigEntitySpec; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -27,6 +28,7 @@ public class MergedEntityRegistry implements EntityRegistry { private final Map eventNameToSpec; private final AspectTemplateEngine _aspectTemplateEngine; private final Map _aspectNameToSpec; + @Nonnull private PluginFactory pluginFactory; public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { // baseEntityRegistry.get*Specs() can return immutable Collections.emptyMap() which fails @@ -42,6 +44,13 @@ public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { baseEntityRegistry.getAspectTemplateEngine(); _aspectTemplateEngine = baseEntityRegistry.getAspectTemplateEngine(); _aspectNameToSpec = baseEntityRegistry.getAspectSpecs(); + if (baseEntityRegistry instanceof ConfigEntityRegistry) { + this.pluginFactory = ((ConfigEntityRegistry) baseEntityRegistry).getPluginFactory(); + } else if (baseEntityRegistry instanceof PatchEntityRegistry) { + this.pluginFactory = ((PatchEntityRegistry) baseEntityRegistry).getPluginFactory(); + } else { + this.pluginFactory = PluginFactory.empty(); + } } private void validateEntitySpec(EntitySpec entitySpec, final ValidationResult validationResult) { @@ -81,6 +90,11 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) eventNameToSpec.putAll(patchEntityRegistry.getEventSpecs()); } // TODO: Validate that the entity registries don't have conflicts among each other + + // Merge Plugins + this.pluginFactory = + PluginFactory.merge(this.pluginFactory, patchEntityRegistry.getPluginFactory()); + return this; } @@ -200,6 +214,12 @@ public AspectTemplateEngine getAspectTemplateEngine() { return _aspectTemplateEngine; } + @Nonnull + @Override + public PluginFactory getPluginFactory() { + return this.pluginFactory; + } + @Setter @Getter private class ValidationResult { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java index 9eafbe05a4fc6..c605cfa188fc8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java @@ -7,6 +7,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.linkedin.data.schema.DataSchema; +import com.linkedin.metadata.aspect.plugins.PluginFactory; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.EntitySpec; @@ -32,6 +33,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; @@ -44,6 +46,7 @@ public class PatchEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; + @Getter private final PluginFactory pluginFactory; private final Map entityNameToSpec; private final Map eventNameToSpec; private final Map _aspectNameToSpec; @@ -90,6 +93,7 @@ public PatchEntityRegistry( throws IOException, EntityRegistryException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), + DataSchemaFactory.getClassLoader(configFileClassPathPair.getSecond()).stream().toList(), configFileClassPathPair.getFirst(), registryName, registryVersion); @@ -138,12 +142,14 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) public PatchEntityRegistry( DataSchemaFactory dataSchemaFactory, + List classLoaders, Path configFilePath, String registryName, ComparableVersion registryVersion) throws FileNotFoundException, EntityRegistryException { this( dataSchemaFactory, + classLoaders, new FileInputStream(configFilePath.toString()), registryName, registryVersion); @@ -151,6 +157,7 @@ public PatchEntityRegistry( private PatchEntityRegistry( DataSchemaFactory dataSchemaFactory, + List classLoaders, InputStream configFileStream, String registryName, ComparableVersion registryVersion) @@ -162,6 +169,7 @@ private PatchEntityRegistry( Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); } catch (IOException e) { e.printStackTrace(); throw new IllegalArgumentException( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java index 05c752a5c1575..b90e5eb72400b 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java @@ -181,6 +181,10 @@ private void loadOneRegistry( entityRegistry = new PatchEntityRegistry(patchDirectory, registryName, registryVersion); parentRegistry.apply(entityRegistry); loadResultBuilder.loadResult(LoadStatus.SUCCESS); + + // Load plugin information + loadResultBuilder.plugins(entityRegistry.getPluginFactory().getPluginLoadResult()); + log.info("Loaded registry {} successfully", entityRegistry); } catch (RuntimeException | EntityRegistryException | IOException e) { log.debug("{}: Failed to load registry {} with {}", this, registryName, e.getMessage()); @@ -189,6 +193,7 @@ private void loadOneRegistry( e.printStackTrace(pw); loadResultBuilder.loadResult(LoadStatus.FAILURE).failureReason(sw.toString()).failureCount(1); } + addLoadResult(registryName, registryVersion, loadResultBuilder.build(), entityRegistry); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entities.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entities.java index e55bfb69d6848..94f705d4f1193 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entities.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/Entities.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.models.registry.config; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import java.util.List; import lombok.AccessLevel; import lombok.AllArgsConstructor; @@ -13,4 +14,5 @@ public class Entities { String id; List entities; List events; + PluginConfiguration plugins; } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/EntityRegistryLoadResult.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/EntityRegistryLoadResult.java index f08fa5ba0a477..076387909326b 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/EntityRegistryLoadResult.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/config/EntityRegistryLoadResult.java @@ -1,6 +1,8 @@ package com.linkedin.metadata.models.registry.config; +import java.util.Set; import lombok.Builder; +import lombok.Data; import lombok.Getter; import lombok.Setter; @@ -11,4 +13,19 @@ public class EntityRegistryLoadResult { private String registryLocation; private String failureReason; @Setter private int failureCount; + private PluginLoadResult plugins; + + @Builder + @Data + public static class PluginLoadResult { + private int validatorCount; + private int mutationHookCount; + private int mcpSideEffectCount; + private int mclSideEffectCount; + + @Builder.Default private Set validatorClasses = Set.of(); + @Builder.Default private Set mutationHookClasses = Set.of(); + @Builder.Default private Set mcpSideEffectClasses = Set.of(); + @Builder.Default private Set mclSideEffectClasses = Set.of(); + } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java new file mode 100644 index 0000000000000..8c3f71fcc8019 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/PluginsTest.java @@ -0,0 +1,211 @@ +package com.linkedin.metadata.aspect.plugins; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import com.datahub.test.TestEntityProfile; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.EventSpec; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistryException; +import com.linkedin.metadata.models.registry.MergedEntityRegistry; +import java.io.FileNotFoundException; +import java.util.Map; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class PluginsTest { + public static String REGISTRY_FILE_1 = "test-entity-registry-plugins-1.yml"; + public static String REGISTRY_FILE_2 = "test-entity-registry-plugins-2.yml"; + public static String REGISTRY_FILE_3 = "test-entity-registry-plugins-3.yml"; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testConfigEntityRegistry() throws FileNotFoundException { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_1)); + + Map entitySpecs = configEntityRegistry.getEntitySpecs(); + Map eventSpecs = configEntityRegistry.getEventSpecs(); + assertEquals(entitySpecs.values().size(), 2); + assertEquals(eventSpecs.values().size(), 1); + + EntitySpec entitySpec = configEntityRegistry.getEntitySpec("dataset"); + assertEquals(entitySpec.getName(), "dataset"); + assertEquals(entitySpec.getKeyAspectSpec().getName(), "datasetKey"); + assertEquals(entitySpec.getAspectSpecs().size(), 4); + assertNotNull(entitySpec.getAspectSpec("datasetKey")); + assertNotNull(entitySpec.getAspectSpec("datasetProperties")); + assertNotNull(entitySpec.getAspectSpec("schemaMetadata")); + assertNotNull(entitySpec.getAspectSpec("status")); + + entitySpec = configEntityRegistry.getEntitySpec("chart"); + assertEquals(entitySpec.getName(), "chart"); + assertEquals(entitySpec.getKeyAspectSpec().getName(), "chartKey"); + assertEquals(entitySpec.getAspectSpecs().size(), 3); + assertNotNull(entitySpec.getAspectSpec("chartKey")); + assertNotNull(entitySpec.getAspectSpec("chartInfo")); + assertNotNull(entitySpec.getAspectSpec("status")); + + EventSpec eventSpec = configEntityRegistry.getEventSpec("testEvent"); + assertEquals(eventSpec.getName(), "testEvent"); + assertNotNull(eventSpec.getPegasusSchema()); + + assertEquals( + configEntityRegistry.getAspectPayloadValidators(ChangeType.UPSERT, "*", "status").size(), + 2); + assertEquals( + configEntityRegistry.getAspectPayloadValidators(ChangeType.DELETE, "*", "status").size(), + 0); + + assertEquals( + configEntityRegistry.getMCLSideEffects(ChangeType.UPSERT, "chart", "chartInfo").size(), 1); + assertEquals( + configEntityRegistry.getMCLSideEffects(ChangeType.DELETE, "chart", "chartInfo").size(), 0); + + assertEquals( + configEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey").size(), + 1); + assertEquals( + configEntityRegistry.getMCPSideEffects(ChangeType.DELETE, "dataset", "datasetKey").size(), + 0); + + assertEquals( + configEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata").size(), 1); + assertEquals( + configEntityRegistry.getMutationHooks(ChangeType.DELETE, "*", "schemaMetadata").size(), 0); + } + + @Test + public void testMergedEntityRegistry() throws EntityRegistryException { + ConfigEntityRegistry configEntityRegistry1 = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_1)); + ConfigEntityRegistry configEntityRegistry2 = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_2)); + + MergedEntityRegistry mergedEntityRegistry = new MergedEntityRegistry(configEntityRegistry1); + mergedEntityRegistry.apply(configEntityRegistry2); + + Map entitySpecs = mergedEntityRegistry.getEntitySpecs(); + Map eventSpecs = mergedEntityRegistry.getEventSpecs(); + assertEquals(entitySpecs.values().size(), 2); + assertEquals(eventSpecs.values().size(), 1); + + EntitySpec entitySpec = mergedEntityRegistry.getEntitySpec("dataset"); + assertEquals(entitySpec.getName(), "dataset"); + assertEquals(entitySpec.getKeyAspectSpec().getName(), "datasetKey"); + assertEquals(entitySpec.getAspectSpecs().size(), 4); + assertNotNull(entitySpec.getAspectSpec("datasetKey")); + assertNotNull(entitySpec.getAspectSpec("datasetProperties")); + assertNotNull(entitySpec.getAspectSpec("schemaMetadata")); + assertNotNull(entitySpec.getAspectSpec("status")); + + entitySpec = mergedEntityRegistry.getEntitySpec("chart"); + assertEquals(entitySpec.getName(), "chart"); + assertEquals(entitySpec.getKeyAspectSpec().getName(), "chartKey"); + assertEquals(entitySpec.getAspectSpecs().size(), 3); + assertNotNull(entitySpec.getAspectSpec("chartKey")); + assertNotNull(entitySpec.getAspectSpec("chartInfo")); + assertNotNull(entitySpec.getAspectSpec("status")); + + EventSpec eventSpec = mergedEntityRegistry.getEventSpec("testEvent"); + assertEquals(eventSpec.getName(), "testEvent"); + assertNotNull(eventSpec.getPegasusSchema()); + + assertEquals( + mergedEntityRegistry.getAspectPayloadValidators(ChangeType.UPSERT, "*", "status").size(), + 3); + assertEquals( + mergedEntityRegistry.getAspectPayloadValidators(ChangeType.DELETE, "*", "status").size(), + 1); + + assertEquals( + mergedEntityRegistry.getMCLSideEffects(ChangeType.UPSERT, "chart", "chartInfo").size(), 2); + assertEquals( + mergedEntityRegistry.getMCLSideEffects(ChangeType.DELETE, "chart", "chartInfo").size(), 1); + + assertEquals( + mergedEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey").size(), + 2); + assertEquals( + mergedEntityRegistry.getMCPSideEffects(ChangeType.DELETE, "dataset", "datasetKey").size(), + 1); + + assertEquals( + mergedEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata").size(), 2); + assertEquals( + mergedEntityRegistry.getMutationHooks(ChangeType.DELETE, "*", "schemaMetadata").size(), 1); + } + + @Test + public void tripleMergeWithDisabled() throws EntityRegistryException { + ConfigEntityRegistry configEntityRegistry1 = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_1)); + ConfigEntityRegistry configEntityRegistry2 = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_2)); + ConfigEntityRegistry configEntityRegistry3 = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE_3)); + + MergedEntityRegistry mergedEntityRegistry = new MergedEntityRegistry(configEntityRegistry1); + mergedEntityRegistry.apply(configEntityRegistry2); + + assertEquals( + mergedEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 1); + assertEquals( + mergedEntityRegistry.getAllMutationHooks().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 1); + assertEquals( + mergedEntityRegistry.getAllMCLSideEffects().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 1); + assertEquals( + mergedEntityRegistry.getAllMCPSideEffects().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 1); + + // This one disables earlier plugins that are delete + mergedEntityRegistry.apply(configEntityRegistry3); + + assertEquals( + mergedEntityRegistry.getAllAspectPayloadValidators().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 0); + assertEquals( + mergedEntityRegistry.getAllMutationHooks().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 0); + assertEquals( + mergedEntityRegistry.getAllMCLSideEffects().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 0); + assertEquals( + mergedEntityRegistry.getAllMCPSideEffects().stream() + .filter(p -> p.getConfig().getSupportedOperations().contains("DELETE")) + .count(), + 0); + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java new file mode 100644 index 0000000000000..ce904142fecfe --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java @@ -0,0 +1,69 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import static org.testng.Assert.assertEquals; + +import com.datahub.test.TestEntityProfile; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class MCLSideEffectTest { + public static String REGISTRY_FILE = "test-entity-registry-plugins-1.yml"; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testCustomMCLSideEffect() { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); + + List mclSideEffects = + configEntityRegistry.getMCLSideEffects(ChangeType.UPSERT, "chart", "chartInfo"); + assertEquals( + mclSideEffects, + List.of( + new TestMCLSideEffect( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("chartInfo") + .build())) + .build()))); + } + + public static class TestMCLSideEffect extends MCLSideEffect { + + public TestMCLSideEffect(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + @Override + protected Stream applyMCLSideEffect( + @Nonnull MCLBatchItem input, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + return Stream.of(input); + } + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java new file mode 100644 index 0000000000000..ee8f947e0e994 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -0,0 +1,67 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import static org.testng.Assert.assertEquals; + +import com.datahub.test.TestEntityProfile; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.List; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class MCPSideEffectTest { + public static String REGISTRY_FILE = "test-entity-registry-plugins-1.yml"; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testCustomMCPSideEffect() { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); + + List mcpSideEffects = + configEntityRegistry.getMCPSideEffects(ChangeType.UPSERT, "dataset", "datasetKey"); + assertEquals( + mcpSideEffects, + List.of( + new MCPSideEffectTest.TestMCPSideEffect( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("dataset") + .aspectName("datasetKey") + .build())) + .build()))); + } + + public static class TestMCPSideEffect extends MCPSideEffect { + + public TestMCPSideEffect(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + @Override + protected Stream applyMCPSideEffect( + UpsertItem input, EntityRegistry entityRegistry, @Nonnull AspectRetriever aspectRetriever) { + return Stream.of(input); + } + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java new file mode 100644 index 0000000000000..5094fd7fdd443 --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java @@ -0,0 +1,76 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import static org.testng.Assert.assertEquals; + +import com.datahub.test.TestEntityProfile; +import com.linkedin.common.AuditStamp; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import com.linkedin.mxe.SystemMetadata; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class MutationPluginTest { + public static String REGISTRY_FILE = "test-entity-registry-plugins-1.yml"; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testCustomMutator() { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); + + List mutators = + configEntityRegistry.getMutationHooks(ChangeType.UPSERT, "*", "schemaMetadata"); + assertEquals( + mutators, + List.of( + new TestMutator( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("schemaMetadata") + .build())) + .build()))); + } + + public static class TestMutator extends MutationHook { + + public TestMutator(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + @Override + protected void mutate( + @Nonnull ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate oldAspectValue, + @Nullable RecordTemplate newAspectValue, + @Nullable SystemMetadata oldSystemMetadata, + @Nullable SystemMetadata newSystemMetadata, + @Nonnull AuditStamp auditStamp, + @Nonnull AspectRetriever aspectRetriever) {} + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java new file mode 100644 index 0000000000000..07c99ee8546be --- /dev/null +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java @@ -0,0 +1,97 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +import static org.testng.Assert.assertEquals; + +import com.datahub.test.TestEntityProfile; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.ConfigEntityRegistry; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ValidatorPluginTest { + public static String REGISTRY_FILE = "test-entity-registry-plugins-1.yml"; + + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Test + public void testCustomValidator() { + ConfigEntityRegistry configEntityRegistry = + new ConfigEntityRegistry( + TestEntityProfile.class.getClassLoader().getResourceAsStream(REGISTRY_FILE)); + + List validators = + configEntityRegistry.getAspectPayloadValidators(ChangeType.UPSERT, "*", "status"); + assertEquals( + validators, + List.of( + new TestValidator( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("status") + .build())) + .build()), + new TestValidator( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("status") + .build())) + .build()))); + } + + public static class TestValidator extends AspectPayloadValidator { + + public TestValidator(AspectPluginConfig config) { + super(config); + } + + @Override + protected void validateProposedAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nonnull RecordTemplate aspectPayload, + AspectRetriever aspectRetriever) + throws AspectValidationException { + if (entityUrn.toString().contains("dataset")) { + throw new AspectValidationException("test error"); + } + } + + @Override + protected void validatePreCommitAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate previousAspect, + @Nonnull RecordTemplate proposedAspect, + AspectRetriever aspectRetriever) + throws AspectValidationException {} + } +} diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java index 1652a51290597..27227f133ab55 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.EventSpec; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Map; import org.testng.annotations.Test; @@ -47,18 +48,20 @@ public void testEntityRegistryLoad() throws Exception, EntityRegistryException { */ @Test public void testEntityRegistryWithKeyLoad() throws Exception, EntityRegistryException { - DataSchemaFactory dataSchemaFactory = - DataSchemaFactory.withCustomClasspath( - Paths.get( - TestConstants.BASE_DIRECTORY - + "/" - + TestConstants.TEST_REGISTRY - + "/" - + TestConstants.TEST_VERSION.toString())); + Path pluginLocation = + Paths.get( + TestConstants.BASE_DIRECTORY + + "/" + + TestConstants.TEST_REGISTRY + + "/" + + TestConstants.TEST_VERSION.toString()); + + DataSchemaFactory dataSchemaFactory = DataSchemaFactory.withCustomClasspath(pluginLocation); PatchEntityRegistry patchEntityRegistry = new PatchEntityRegistry( dataSchemaFactory, + DataSchemaFactory.getClassLoader(pluginLocation).stream().toList(), Paths.get("src/test_plugins/mycompany-full-model/0.0.1/entity-registry.yaml"), TestConstants.TEST_REGISTRY, TestConstants.TEST_VERSION); diff --git a/entity-registry/src/test/resources/test-entity-registry-plugins-1.yml b/entity-registry/src/test/resources/test-entity-registry-plugins-1.yml new file mode 100644 index 0000000000000..7ef21bce3144c --- /dev/null +++ b/entity-registry/src/test/resources/test-entity-registry-plugins-1.yml @@ -0,0 +1,67 @@ +id: test-registry-1 +entities: + - name: dataset + keyAspect: datasetKey + category: core + aspects: + - datasetProperties + - schemaMetadata + - status + - name: chart + keyAspect: chartKey + aspects: + - chartInfo + - status +events: + - name: testEvent + +plugins: + aspectPayloadValidators: + # All status aspects on any entity + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: '*' + aspectName: status + # Chart status only + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: chart + aspectName: status + # Disabled + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: false + supportedOperations: + - DELETE + supportedEntityAspectNames: + - entityName: '*' + aspectName: status + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: '*' + aspectName: schemaMetadata + mclSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: chart + aspectName: chartInfo + mcpSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: dataset + aspectName: datasetKey diff --git a/entity-registry/src/test/resources/test-entity-registry-plugins-2.yml b/entity-registry/src/test/resources/test-entity-registry-plugins-2.yml new file mode 100644 index 0000000000000..b35b17d3bd7db --- /dev/null +++ b/entity-registry/src/test/resources/test-entity-registry-plugins-2.yml @@ -0,0 +1,45 @@ +id: test-registry-2 +entities: [] +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: dataset + aspectName: status + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: true + supportedOperations: + - DELETE + supportedEntityAspectNames: + - entityName: '*' + aspectName: status + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator' + enabled: true + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: '*' + aspectName: schemaMetadata + mclSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect' + enabled: true + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: chart + aspectName: chartInfo + mcpSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect' + enabled: true + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: dataset + aspectName: datasetKey diff --git a/entity-registry/src/test/resources/test-entity-registry-plugins-3.yml b/entity-registry/src/test/resources/test-entity-registry-plugins-3.yml new file mode 100644 index 0000000000000..8ce21e27d0a1c --- /dev/null +++ b/entity-registry/src/test/resources/test-entity-registry-plugins-3.yml @@ -0,0 +1,38 @@ +id: test-registry-3 +entities: [] +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator' + enabled: false + supportedOperations: + - DELETE + supportedEntityAspectNames: + - entityName: '*' + aspectName: status + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator' + enabled: false + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: '*' + aspectName: schemaMetadata + mclSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect' + enabled: false + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: chart + aspectName: chartInfo + mcpSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect' + enabled: false + supportedOperations: + - UPSERT + - DELETE + supportedEntityAspectNames: + - entityName: dataset + aspectName: datasetKey diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index e7ec4d313b5f5..34921e4182b10 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -23,14 +23,15 @@ import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.aspect.EnvelopedAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.transactions.AspectsBatch; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.query.AutoCompleteResult; @@ -84,7 +85,7 @@ public class JavaEntityClient implements EntityClient { private final Clock _clock = Clock.systemUTC(); - private final EntityService _entityService; + private final EntityService _entityService; private final DeleteEntityService _deleteEntityService; private final EntitySearchService _entitySearchService; private final CachingEntitySearchService _cachingEntitySearchService; @@ -712,11 +713,14 @@ public String ingestProposal( Stream.concat(Stream.of(metadataChangeProposal), additionalChanges.stream()); AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), _entityService.getEntityRegistry()) + .mcps( + proposalStream.collect(Collectors.toList()), + auditStamp, + _entityService.getEntityRegistry(), + this) .build(); - IngestResult one = - _entityService.ingestProposal(batch, auditStamp, async).stream().findFirst().get(); + IngestResult one = _entityService.ingestProposal(batch, async).stream().findFirst().get(); Urn urn = one.getUrn(); tryIndexRunId(urn, metadataChangeProposal.getSystemMetadata()); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java index 0ac18b4aacc04..31c2846a9c9f3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.LineageSearchService; @@ -23,7 +24,7 @@ public class SystemJavaEntityClient extends JavaEntityClient implements SystemEn private final Authentication systemAuthentication; public SystemJavaEntityClient( - EntityService entityService, + EntityService entityService, DeleteEntityService deleteEntityService, EntitySearchService entitySearchService, CachingEntitySearchService cachingEntitySearchService, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index ae27f9f7e6f1a..e00a696a095a1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -1,9 +1,9 @@ package com.linkedin.metadata.entity; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.ebean.PagedList; import io.ebean.Transaction; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index eaf9b1a2cc415..d72586e289ea7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -1,7 +1,14 @@ package com.linkedin.metadata.entity; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.batch.SystemAspect; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.SystemMetadata; +import java.net.URISyntaxException; import java.sql.Timestamp; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.EqualsAndHashCode; import lombok.Getter; @@ -40,4 +47,74 @@ public class EntityAspect { public EntityAspectIdentifier toAspectIdentifier() { return new EntityAspectIdentifier(getUrn(), getAspect(), getVersion()); } + + @Nonnull + public SystemAspect asSystemAspect() { + return EntitySystemAspect.from(this); + } + + /** + * Provide a typed EntityAspect without breaking the existing public contract with generic types. + */ + @Getter + @AllArgsConstructor + @EqualsAndHashCode + public static class EntitySystemAspect implements SystemAspect { + + @Nullable + public static EntitySystemAspect from(EntityAspect entityAspect) { + return entityAspect != null ? new EntitySystemAspect(entityAspect) : null; + } + + @Nonnull private final EntityAspect entityAspect; + + @Nonnull + public Urn getUrn() { + try { + return Urn.createFromString(entityAspect.getUrn()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } + + @Nonnull + public String getUrnRaw() { + return entityAspect.getUrn(); + } + + @Override + public SystemMetadata getSystemMetadata() { + return EntityUtils.parseSystemMetadata(entityAspect.getSystemMetadata()); + } + + @Nullable + public String getSystemMetadataRaw() { + return entityAspect.getSystemMetadata(); + } + + @Override + public Timestamp getCreatedOn() { + return entityAspect.getCreatedOn(); + } + + @Override + public String getAspectName() { + return entityAspect.aspect; + } + + @Override + public long getVersion() { + return entityAspect.getVersion(); + } + + @Override + public RecordTemplate getRecordTemplate(EntityRegistry entityRegistry) { + return EntityUtils.toAspectRecord( + getUrn().getEntityType(), getAspectName(), entityAspect.getMetadata(), entityRegistry); + } + + public EntityAspect asRaw() { + return entityAspect; + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 7bd8e763cdc27..2e19916ee3c8f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1,8 +1,20 @@ package com.linkedin.metadata.entity; -import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.search.utils.BrowsePathUtils.*; -import static com.linkedin.metadata.utils.PegasusUtils.*; +import static com.linkedin.metadata.Constants.APP_SOURCE; +import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; +import static com.linkedin.metadata.Constants.BROWSE_PATHS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.BROWSE_PATHS_V2_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME; +import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; +import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.Constants.SYSTEM_ACTOR; +import static com.linkedin.metadata.Constants.UI_SOURCE; +import static com.linkedin.metadata.search.utils.BrowsePathUtils.buildDataPlatformUrn; +import static com.linkedin.metadata.search.utils.BrowsePathUtils.getDefaultBrowsePath; +import static com.linkedin.metadata.utils.PegasusUtils.constructMCL; +import static com.linkedin.metadata.utils.PegasusUtils.getDataTemplateClassFromSchema; +import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; @@ -39,17 +51,20 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.Aspect; import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.MCPBatchItem; +import com.linkedin.metadata.aspect.batch.SystemAspect; +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.PatchBatchItem; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; @@ -72,6 +87,7 @@ import com.linkedin.util.Pair; import io.ebean.PagedList; import io.ebean.Transaction; +import io.opentelemetry.extension.annotations.WithSpan; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.sql.Timestamp; @@ -129,7 +145,7 @@ * class. */ @Slf4j -public class EntityServiceImpl implements EntityService { +public class EntityServiceImpl implements EntityService { /** * As described above, the latest version of an aspect should always take the value 0, with @@ -141,7 +157,7 @@ public class EntityServiceImpl implements EntityService { private final EventProducer _producer; private final EntityRegistry _entityRegistry; private final Map> _entityToValidAspects; - private RetentionService _retentionService; + private RetentionService _retentionService; private final Boolean _alwaysEmitChangeLog; @Getter private final UpdateIndicesService _updateIndicesService; private final PreProcessHooks _preProcessHooks; @@ -149,6 +165,8 @@ public class EntityServiceImpl implements EntityService { private final Integer ebeanMaxTransactionRetry; + private SystemEntityClient systemEntityClient; + public EntityServiceImpl( @Nonnull final AspectDao aspectDao, @Nonnull final EventProducer producer, @@ -187,9 +205,21 @@ public EntityServiceImpl( @Override public void setSystemEntityClient(SystemEntityClient systemEntityClient) { + this.systemEntityClient = systemEntityClient; this._updateIndicesService.setSystemEntityClient(systemEntityClient); } + @Override + public SystemEntityClient getSystemEntityClient() { + return this.systemEntityClient; + } + + @Override + public RecordTemplate getLatestAspect(@Nonnull Urn urn, @Nonnull String aspectName) { + log.debug("Invoked getLatestAspect with urn {}, aspect {}", urn, aspectName); + return getAspect(urn, aspectName, ASPECT_LATEST_VERSION); + } + /** * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided set * of aspect names. @@ -231,8 +261,7 @@ public Map> getLatestAspects( } final RecordTemplate aspectRecord = - EntityUtils.toAspectRecord( - urn, aspectName, aspectEntry.getMetadata(), getEntityRegistry()); + aspectEntry.asSystemAspect().getRecordTemplate(getEntityRegistry()); urnToAspects.putIfAbsent(urn, new ArrayList<>()); urnToAspects.get(urn).add(aspectRecord); }); @@ -252,8 +281,7 @@ public Map getLatestAspectsForUrn( (key, aspectEntry) -> { final String aspectName = key.getAspect(); final RecordTemplate aspectRecord = - EntityUtils.toAspectRecord( - urn, aspectName, aspectEntry.getMetadata(), getEntityRegistry()); + aspectEntry.asSystemAspect().getRecordTemplate(getEntityRegistry()); result.put(aspectName, aspectRecord); }); return result; @@ -320,13 +348,14 @@ public EntityResponse getEntityV2( * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link Entity} object */ + @WithSpan @Override public Map getEntitiesV2( @Nonnull final String entityName, @Nonnull final Set urns, @Nonnull final Set aspectNames) throws URISyntaxException { - return getLatestEnvelopedAspects(entityName, urns, aspectNames).entrySet().stream() + return getLatestEnvelopedAspects(urns, aspectNames).entrySet().stream() .collect( Collectors.toMap( Map.Entry::getKey, entry -> toEntityResponse(entry.getKey(), entry.getValue()))); @@ -354,16 +383,13 @@ public Map getEntitiesVersionedV2( /** * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects * - * @param entityName name of the entity to fetch * @param urns set of urns to fetch * @param aspectNames set of aspects to fetch - * @return a map of {@link Urn} to {@link EnvelopedAspect} object + * @return a map of {@link Urn} to {@link EntityAspect.EntitySystemAspect} object */ @Override public Map> getLatestEnvelopedAspects( - // TODO: entityName is unused, can we remove this as a param? - @Nonnull String entityName, @Nonnull Set urns, @Nonnull Set aspectNames) - throws URISyntaxException { + @Nonnull Set urns, @Nonnull Set aspectNames) throws URISyntaxException { final Set dbKeys = urns.stream() @@ -483,7 +509,7 @@ private Map> getCorrespondingAspects( public EnvelopedAspect getLatestEnvelopedAspect( @Nonnull final String entityName, @Nonnull final Urn urn, @Nonnull final String aspectName) throws Exception { - return getLatestEnvelopedAspects(entityName, ImmutableSet.of(urn), ImmutableSet.of(aspectName)) + return getLatestEnvelopedAspects(ImmutableSet.of(urn), ImmutableSet.of(aspectName)) .getOrDefault(urn, Collections.emptyList()) .stream() .filter(envelopedAspect -> envelopedAspect.getName().equals(aspectName)) @@ -597,18 +623,19 @@ public List ingestAspects( List> pairList, @Nonnull final AuditStamp auditStamp, SystemMetadata systemMetadata) { - List items = + List items = pairList.stream() .map( pair -> - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(pair.getKey()) .aspect(pair.getValue()) .systemMetadata(systemMetadata) - .build(_entityRegistry)) + .auditStamp(auditStamp) + .build(_entityRegistry, systemEntityClient)) .collect(Collectors.toList()); - return ingestAspects(AspectsBatchImpl.builder().items(items).build(), auditStamp, true, true); + return ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); } /** @@ -616,22 +643,17 @@ public List ingestAspects( * com.linkedin.mxe.MetadataChangeLog}. * * @param aspectsBatch aspects to write - * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @param emitMCL whether a {@link com.linkedin.mxe.MetadataChangeLog} should be emitted in * correspondence upon successful update * @return the {@link RecordTemplate} representation of the written aspect object */ @Override public List ingestAspects( - @Nonnull final AspectsBatch aspectsBatch, - @Nonnull final AuditStamp auditStamp, - boolean emitMCL, - boolean overwrite) { + @Nonnull final AspectsBatch aspectsBatch, boolean emitMCL, boolean overwrite) { Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); - List ingestResults = - ingestAspectsToLocalDB(aspectsBatch, auditStamp, overwrite); + List ingestResults = ingestAspectsToLocalDB(aspectsBatch, overwrite); List mclResults = emitMCL(ingestResults, emitMCL); ingestToLocalDBTimer.stop(); @@ -646,14 +668,11 @@ public List ingestAspects( * @param aspectsBatch Collection of the following: an urn associated with the new aspect, name of * the aspect being inserted, and a function to apply to the latest version of the aspect to * get the updated version - * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time * @return Details about the new and old version of the aspect */ @Nonnull private List ingestAspectsToLocalDB( - @Nonnull final AspectsBatch aspectsBatch, - @Nonnull final AuditStamp auditStamp, - boolean overwrite) { + @Nonnull final AspectsBatch aspectsBatch, boolean overwrite) { if (aspectsBatch.containsDuplicateAspects()) { log.warn(String.format("Batch contains duplicates: %s", aspectsBatch)); @@ -662,50 +681,68 @@ private List ingestAspectsToLocalDB( return _aspectDao.runInTransactionWithRetry( (tx) -> { // Read before write is unfortunate, however batch it - Map> urnAspects = aspectsBatch.getUrnAspectsMap(); + final Map> urnAspects = aspectsBatch.getUrnAspectsMap(); // read #1 - Map> latestAspects = - _aspectDao.getLatestAspects(urnAspects); + final Map> latestAspects = + toSystemEntityAspects(_aspectDao.getLatestAspects(urnAspects)); // read #2 - Map> nextVersions = _aspectDao.getNextVersions(urnAspects); + final Map> nextVersions = + _aspectDao.getNextVersions(urnAspects); + + // 1. Convert patches to full upserts + // 2. Run any entity/aspect level hooks + Pair>, List> updatedItems = + aspectsBatch.toUpsertBatchItems(latestAspects, _entityRegistry, systemEntityClient); + + // Fetch additional information if needed + final Map> updatedLatestAspects; + final Map> updatedNextVersions; + if (!updatedItems.getFirst().isEmpty()) { + Map> newLatestAspects = + toSystemEntityAspects(_aspectDao.getLatestAspects(updatedItems.getFirst())); + Map> newNextVersions = + _aspectDao.getNextVersions(updatedItems.getFirst()); + // merge + updatedLatestAspects = aspectsBatch.merge(latestAspects, newLatestAspects); + updatedNextVersions = aspectsBatch.merge(nextVersions, newNextVersions); + } else { + updatedLatestAspects = latestAspects; + updatedNextVersions = nextVersions; + } - List items = - aspectsBatch.getItems().stream() - .map( - item -> { - if (item instanceof UpsertBatchItem) { - return (UpsertBatchItem) item; - } else { - // patch to upsert - PatchBatchItem patchBatchItem = (PatchBatchItem) item; - final String urnStr = patchBatchItem.getUrn().toString(); - final EntityAspect latest = - latestAspects - .getOrDefault(urnStr, Map.of()) - .get(patchBatchItem.getAspectName()); - final RecordTemplate currentValue = - latest != null - ? EntityUtils.toAspectRecord( - patchBatchItem.getUrn(), - patchBatchItem.getAspectName(), - latest.getMetadata(), - _entityRegistry) - : null; - return patchBatchItem.applyPatch(_entityRegistry, currentValue); - } - }) - .collect(Collectors.toList()); + // do final pre-commit checks with previous aspect value + updatedItems + .getSecond() + .forEach( + item -> { + SystemAspect previousAspect = + updatedLatestAspects + .getOrDefault(item.getUrn().toString(), Map.of()) + .get(item.getAspectSpec().getName()); + try { + item.validatePreCommit( + previousAspect == null + ? null + : previousAspect.getRecordTemplate(_entityRegistry), + _entityRegistry, + systemEntityClient); + } catch (AspectValidationException e) { + throw new RuntimeException(e); + } + }); // Database Upsert results List upsertResults = - items.stream() + updatedItems.getSecond().stream() .map( item -> { final String urnStr = item.getUrn().toString(); - final EntityAspect latest = - latestAspects.getOrDefault(urnStr, Map.of()).get(item.getAspectName()); + final SystemAspect latest = + updatedLatestAspects + .getOrDefault(urnStr, Map.of()) + .get(item.getAspectName()); final long nextVersion = - nextVersions + updatedNextVersions .getOrDefault(urnStr, Map.of()) .getOrDefault(item.getAspectName(), 0L); @@ -717,9 +754,11 @@ private List ingestAspectsToLocalDB( item.getUrn(), item.getAspectName(), item.getAspect(), - auditStamp, + item.getAuditStamp(), item.getSystemMetadata(), - latest, + latest == null + ? null + : ((EntityAspect.EntitySystemAspect) latest).asRaw(), nextVersion) .toBuilder() .request(item) @@ -728,21 +767,15 @@ private List ingestAspectsToLocalDB( // support inner-batch upserts latestAspects .computeIfAbsent(urnStr, key -> new HashMap<>()) - .put(item.getAspectName(), item.toLatestEntityAspect(auditStamp)); + .put(item.getAspectName(), item.toLatestEntityAspect()); nextVersions .computeIfAbsent(urnStr, key -> new HashMap<>()) .put(item.getAspectName(), nextVersion + 1); } else { - RecordTemplate oldValue = - EntityUtils.toAspectRecord( - item.getUrn().getEntityType(), - item.getAspectName(), - latest.getMetadata(), - getEntityRegistry()); - SystemMetadata oldMetadata = - EntityUtils.parseSystemMetadata(latest.getSystemMetadata()); + RecordTemplate oldValue = latest.getRecordTemplate(_entityRegistry); + SystemMetadata oldMetadata = latest.getSystemMetadata(); result = - UpdateAspectResult.builder() + UpdateAspectResult.builder() .urn(item.getUrn()) .request(item) .oldValue(oldValue) @@ -750,7 +783,7 @@ private List ingestAspectsToLocalDB( .oldSystemMetadata(oldMetadata) .newSystemMetadata(oldMetadata) .operation(MetadataAuditOperation.UPDATE) - .auditStamp(auditStamp) + .auditStamp(item.getAuditStamp()) .maxVersion(latest.getVersion()) .build(); } @@ -804,6 +837,25 @@ private List ingestAspectsToLocalDB( DEFAULT_MAX_TRANSACTION_RETRY); } + /** + * Convert EntityAspect to EntitySystemAspect + * + * @param latestAspects latest aspect map + * @return map with converted values + */ + private static Map> toSystemEntityAspects( + Map> latestAspects) { + return latestAspects.entrySet().stream() + .map( + e -> + Map.entry( + e.getKey(), + e.getValue().entrySet().stream() + .map(e2 -> Map.entry(e2.getKey(), e2.getValue().asSystemAspect())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + @Nonnull private List emitMCL(List sqlResults, boolean emitMCL) { List withEmitMCL = @@ -875,14 +927,15 @@ public RecordTemplate ingestAspectIfNotPresent( AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder() .one( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) .aspect(newValue) .systemMetadata(systemMetadata) - .build(_entityRegistry)) + .auditStamp(auditStamp) + .build(_entityRegistry, systemEntityClient)) .build(); - List ingested = ingestAspects(aspectsBatch, auditStamp, true, false); + List ingested = ingestAspects(aspectsBatch, true, false); return ingested.stream().findFirst().get().getNewValue(); } @@ -900,8 +953,9 @@ public RecordTemplate ingestAspectIfNotPresent( public IngestResult ingestProposal( MetadataChangeProposal proposal, AuditStamp auditStamp, final boolean async) { return ingestProposal( - AspectsBatchImpl.builder().mcps(List.of(proposal), getEntityRegistry()).build(), - auditStamp, + AspectsBatchImpl.builder() + .mcps(List.of(proposal), auditStamp, getEntityRegistry(), systemEntityClient) + .build(), async) .stream() .findFirst() @@ -917,19 +971,16 @@ public IngestResult ingestProposal( * Key aspect in the DB. Instead, use an Entity Client. * * @param aspectsBatch the proposals to ingest - * @param auditStamp an audit stamp representing the time and actor proposing the change * @param async a flag to control whether we commit to primary store or just write to proposal log * before returning * @return an {@link IngestResult} containing the results */ @Override - public Set ingestProposal( - AspectsBatch aspectsBatch, AuditStamp auditStamp, final boolean async) { + public Set ingestProposal(AspectsBatch aspectsBatch, final boolean async) { - Stream timeseriesIngestResults = - ingestTimeseriesProposal(aspectsBatch, auditStamp); + Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch); Stream nonTimeseriesIngestResults = - async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(aspectsBatch, auditStamp); + async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(aspectsBatch); return Stream.concat(timeseriesIngestResults, nonTimeseriesIngestResults) .collect(Collectors.toSet()); @@ -939,12 +990,10 @@ public Set ingestProposal( * Timeseries is pass through to MCL, no MCP * * @param aspectsBatch timeseries upserts batch - * @param auditStamp provided audit information * @return returns ingest proposal result, however was never in the MCP topic */ - private Stream ingestTimeseriesProposal( - AspectsBatch aspectsBatch, AuditStamp auditStamp) { - List unsupported = + private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch) { + List unsupported = aspectsBatch.getItems().stream() .filter( item -> @@ -954,15 +1003,13 @@ private Stream ingestTimeseriesProposal( if (!unsupported.isEmpty()) { throw new UnsupportedOperationException( "ChangeType not supported: " - + unsupported.stream() - .map(AbstractBatchItem::getChangeType) - .collect(Collectors.toSet())); + + unsupported.stream().map(BatchItem::getChangeType).collect(Collectors.toSet())); } - List, Boolean>>>> timeseriesResults = + List, Boolean>>>> timeseriesResults = aspectsBatch.getItems().stream() .filter(item -> item.getAspectSpec().isTimeseries()) - .map(item -> (UpsertBatchItem) item) + .map(item -> (MCPUpsertBatchItem) item) .map( item -> Pair.of( @@ -974,7 +1021,7 @@ private Stream ingestTimeseriesProposal( item.getSystemMetadata(), item.getMetadataChangeProposal(), item.getUrn(), - auditStamp, + item.getAuditStamp(), item.getAspectSpec()))) .collect(Collectors.toList()); @@ -992,7 +1039,7 @@ private Stream ingestTimeseriesProposal( } }); - UpsertBatchItem request = result.getFirst(); + MCPUpsertBatchItem request = result.getFirst(); return IngestResult.builder() .urn(request.getUrn()) .request(request) @@ -1010,8 +1057,8 @@ private Stream ingestTimeseriesProposal( * @return produced items to the MCP topic */ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { - List nonTimeseries = - aspectsBatch.getItems().stream() + List nonTimeseries = + aspectsBatch.getMCPItems().stream() .filter(item -> !item.getAspectSpec().isTimeseries()) .collect(Collectors.toList()); @@ -1029,7 +1076,7 @@ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { return nonTimeseries.stream() .map( item -> - IngestResult.builder() + IngestResult.builder() .urn(item.getUrn()) .request(item) .publishedMCP(true) @@ -1046,8 +1093,7 @@ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { } } - private Stream ingestProposalSync( - AspectsBatch aspectsBatch, AuditStamp auditStamp) { + private Stream ingestProposalSync(AspectsBatch aspectsBatch) { AspectsBatchImpl nonTimeseries = AspectsBatchImpl.builder() .items( @@ -1056,8 +1102,8 @@ private Stream ingestProposalSync( .collect(Collectors.toList())) .build(); - List unsupported = - nonTimeseries.getItems().stream() + List unsupported = + nonTimeseries.getMCPItems().stream() .filter( item -> item.getMetadataChangeProposal().getChangeType() != ChangeType.PATCH @@ -1071,12 +1117,12 @@ private Stream ingestProposalSync( .collect(Collectors.toSet())); } - List upsertResults = ingestAspects(nonTimeseries, auditStamp, true, true); + List upsertResults = ingestAspects(nonTimeseries, true, true); return upsertResults.stream() .map( result -> { - AbstractBatchItem item = result.getRequest(); + UpsertItem item = result.getRequest(); return IngestResult.builder() .urn(item.getUrn()) @@ -1421,7 +1467,7 @@ public Optional, Boolean>> conditionallyProduceMCLAsync( } private UpdateAspectResult conditionallyProduceMCLAsync(UpdateAspectResult result) { - AbstractBatchItem request = result.getRequest(); + UpsertItem request = result.getRequest(); Optional, Boolean>> emissionStatus = conditionallyProduceMCLAsync( result.getOldValue(), @@ -1443,12 +1489,6 @@ private UpdateAspectResult conditionallyProduceMCLAsync(UpdateAspectResult resul .orElse(result); } - @Override - public RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName) { - log.debug("Invoked getLatestAspect with urn {}, aspect {}", urn, aspectName); - return getAspect(urn, aspectName, ASPECT_LATEST_VERSION); - } - @Override public void ingestEntities( @Nonnull final List entities, @@ -1647,16 +1687,17 @@ private void ingestSnapshotUnion( aspectRecordsToIngest.stream() .map( pair -> - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(pair.getKey()) .aspect(pair.getValue()) + .auditStamp(auditStamp) .systemMetadata(systemMetadata) - .build(_entityRegistry)) + .build(_entityRegistry, systemEntityClient)) .collect(Collectors.toList())) .build(); - ingestAspects(aspectsBatch, auditStamp, true, true); + ingestAspects(aspectsBatch, true, true); } @Override @@ -1758,7 +1799,7 @@ public EntityRegistry getEntityRegistry() { } @Override - public void setRetentionService(RetentionService retentionService) { + public void setRetentionService(RetentionService retentionService) { _retentionService = retentionService; } @@ -1863,8 +1904,7 @@ public RollbackRunResult deleteUrn(Urn urn) { return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion); } - SystemMetadata latestKeySystemMetadata = - EntityUtils.parseSystemMetadata(latestKey.getSystemMetadata()); + SystemMetadata latestKeySystemMetadata = latestKey.asSystemAspect().getSystemMetadata(); RollbackResult result = deleteAspect( urn.toString(), @@ -1980,20 +2020,14 @@ public RollbackResult deleteAspect( } // 2. Compare the match conditions, if they don't match, ignore. - SystemMetadata latestSystemMetadata = - EntityUtils.parseSystemMetadata(latest.getSystemMetadata()); + SystemMetadata latestSystemMetadata = latest.asSystemAspect().getSystemMetadata(); if (!filterMatch(latestSystemMetadata, conditions)) { return null; } String latestMetadata = latest.getMetadata(); // 3. Check if this is a key aspect - Boolean isKeyAspect = false; - try { - isKeyAspect = getKeyAspectName(Urn.createFromString(urn)).equals(aspectName); - } catch (URISyntaxException e) { - log.error("Error occurred while parsing urn: {}", urn, e); - } + Boolean isKeyAspect = getKeyAspectName(entityUrn).equals(aspectName); // 4. Fetch all preceding aspects, that match List aspectsToDelete = new ArrayList<>(); @@ -2004,8 +2038,11 @@ public RollbackResult deleteAspect( while (maxVersion > 0 && filterMatch) { EntityAspect candidateAspect = _aspectDao.getAspect(urn, aspectName, maxVersion); SystemMetadata previousSysMetadata = - EntityUtils.parseSystemMetadata(candidateAspect.getSystemMetadata()); - filterMatch = filterMatch(previousSysMetadata, conditions); + candidateAspect != null + ? candidateAspect.asSystemAspect().getSystemMetadata() + : null; + filterMatch = + previousSysMetadata != null && filterMatch(previousSysMetadata, conditions); if (filterMatch) { aspectsToDelete.add(candidateAspect); maxVersion = maxVersion - 1; @@ -2069,7 +2106,7 @@ public RollbackResult deleteAspect( latest == null ? null : EntityUtils.toAspectRecord( - Urn.createFromString(latest.getUrn()), + entitySpec.getName(), latest.getAspect(), latestMetadata, getEntityRegistry()); @@ -2078,7 +2115,7 @@ public RollbackResult deleteAspect( survivingAspect == null ? null : EntityUtils.toAspectRecord( - Urn.createFromString(survivingAspect.getUrn()), + entitySpec.getName(), survivingAspect.getAspect(), previousMetadata, getEntityRegistry()); @@ -2098,7 +2135,7 @@ public RollbackResult deleteAspect( latestSystemMetadata, previousValue == null ? null - : EntityUtils.parseSystemMetadata(survivingAspect.getSystemMetadata()), + : survivingAspect.asSystemAspect().getSystemMetadata(), survivingAspect == null ? ChangeType.DELETE : ChangeType.UPSERT, isKeyAspect, additionalRowsDeleted); @@ -2117,7 +2154,8 @@ public RollbackResult deleteAspect( return result; } - protected boolean filterMatch(SystemMetadata systemMetadata, Map conditions) { + protected boolean filterMatch( + @Nonnull SystemMetadata systemMetadata, Map conditions) { String runIdCondition = conditions.getOrDefault("runId", null); if (runIdCondition != null) { if (!runIdCondition.equals(systemMetadata.getRunId())) { @@ -2202,40 +2240,42 @@ private Map getEnvelopedAspects( continue; } - // Aspect found. Now turn it into an EnvelopedAspect - final com.linkedin.entity.Aspect aspect = - RecordUtils.toRecordTemplate( - com.linkedin.entity.Aspect.class, currAspectEntry.getMetadata()); - final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); - envelopedAspect.setName(currAspectEntry.getAspect()); - envelopedAspect.setVersion(currAspectEntry.getVersion()); - // TODO: I think we can assume this here, adding as it's a required field so object mapping - // barfs when trying to access it, - // since nowhere else is using it should be safe for now at least - envelopedAspect.setType(AspectType.VERSIONED); - envelopedAspect.setValue(aspect); + result.put(currKey, toEnvelopedAspect(currAspectEntry)); + } + return result; + } - try { - if (currAspectEntry.getSystemMetadata() != null) { - final SystemMetadata systemMetadata = - RecordUtils.toRecordTemplate( - SystemMetadata.class, currAspectEntry.getSystemMetadata()); - envelopedAspect.setSystemMetadata(systemMetadata); - } - } catch (Exception e) { - log.warn( - "Exception encountered when setting system metadata on enveloped aspect {}. Error: {}", - envelopedAspect.getName(), - e); - } + private static EnvelopedAspect toEnvelopedAspect(EntityAspect entityAspect) { + // Aspect found. Now turn it into an EnvelopedAspect + final com.linkedin.entity.Aspect aspect = + RecordUtils.toRecordTemplate(com.linkedin.entity.Aspect.class, entityAspect.getMetadata()); + final EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setName(entityAspect.getAspect()); + envelopedAspect.setVersion(entityAspect.getVersion()); + // TODO: I think we can assume this here, adding as it's a required field so object mapping + // barfs when trying to access it, + // since nowhere else is using it should be safe for now at least + envelopedAspect.setType(AspectType.VERSIONED); + envelopedAspect.setValue(aspect); - envelopedAspect.setCreated( - new AuditStamp() - .setActor(UrnUtils.getUrn(currAspectEntry.getCreatedBy())) - .setTime(currAspectEntry.getCreatedOn().getTime())); - result.put(currKey, envelopedAspect); + try { + if (entityAspect.getSystemMetadata() != null) { + final SystemMetadata systemMetadata = entityAspect.asSystemAspect().getSystemMetadata(); + envelopedAspect.setSystemMetadata(systemMetadata); + } + } catch (Exception e) { + log.warn( + "Exception encountered when setting system metadata on enveloped aspect {}. Error: {}", + envelopedAspect.getName(), + e.toString()); } - return result; + + envelopedAspect.setCreated( + new AuditStamp() + .setActor(UrnUtils.getUrn(entityAspect.getCreatedBy())) + .setTime(entityAspect.getCreatedOn().getTime())); + + return envelopedAspect; } private EnvelopedAspect getKeyEnvelopedAspect(final Urn urn) { @@ -2287,8 +2327,7 @@ private UpdateAspectResult ingestAspectToLocalDB( // 3. If there is no difference between existing and new, we just update // the lastObserved in system metadata. RunId should stay as the original runId if (oldValue != null && DataTemplateUtil.areEqual(oldValue, newValue)) { - SystemMetadata latestSystemMetadata = - EntityUtils.parseSystemMetadata(latest.getSystemMetadata()); + SystemMetadata latestSystemMetadata = latest.asSystemAspect().getSystemMetadata(); latestSystemMetadata.setLastObserved(providedSystemMetadata.getLastObserved()); latestSystemMetadata.setLastRunId( providedSystemMetadata.getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL); @@ -2306,7 +2345,7 @@ private UpdateAspectResult ingestAspectToLocalDB( .urn(urn) .oldValue(oldValue) .newValue(oldValue) - .oldSystemMetadata(EntityUtils.parseSystemMetadata(latest.getSystemMetadata())) + .oldSystemMetadata(latest.asSystemAspect().getSystemMetadata()) .newSystemMetadata(latestSystemMetadata) .operation(MetadataAuditOperation.UPDATE) .auditStamp(auditStamp) @@ -2342,8 +2381,7 @@ private UpdateAspectResult ingestAspectToLocalDB( .urn(urn) .oldValue(oldValue) .newValue(newValue) - .oldSystemMetadata( - latest == null ? null : EntityUtils.parseSystemMetadata(latest.getSystemMetadata())) + .oldSystemMetadata(latest == null ? null : latest.asSystemAspect().getSystemMetadata()) .newSystemMetadata(providedSystemMetadata) .operation(MetadataAuditOperation.UPDATE) .auditStamp(auditStamp) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index c2a0a211f9e76..459b2d183d7ac 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -11,7 +11,7 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.validation.EntityRegistryUrnValidator; import com.linkedin.metadata.entity.validation.RecordTemplateValidator; import com.linkedin.metadata.models.AspectSpec; @@ -64,8 +64,13 @@ public static void ingestChangeProposals( @Nonnull Urn actor, @Nonnull Boolean async) { entityService.ingestProposal( - AspectsBatchImpl.builder().mcps(changes, entityService.getEntityRegistry()).build(), - getAuditStamp(actor), + AspectsBatchImpl.builder() + .mcps( + changes, + getAuditStamp(actor), + entityService.getEntityRegistry(), + entityService.getSystemEntityClient()) + .build(), async); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index 3293bc6178e43..f37f63913abe4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -500,8 +500,30 @@ public PagedList getPagedAspects(final RestoreIndicesArgs args) { @Nonnull @Override public Stream streamAspects(String entityName, String aspectName) { - // Not implemented - return null; + SimpleStatement ss = + selectFrom(CassandraAspect.TABLE_NAME) + .all() + // assumes alpha characters after the entityType prefix + .whereColumn(CassandraAspect.URN_COLUMN) + .isGreaterThan(literal(String.join(":", List.of("urn", "li", entityName, "")))) + .whereColumn(CassandraAspect.URN_COLUMN) + .isLessThan( + literal( + String.join( + ":", + List.of( + "urn", + "li", + entityName, + "|")))) // this is used for slicing prefixes with alpha characters + .whereColumn(CassandraAspect.ASPECT_COLUMN) + .isEqualTo(literal(aspectName)) + .allowFiltering() // performance impact, however # of properties expected to be + // relatively small + .build(); + + ResultSet rs = _cqlSession.execute(ss); + return rs.all().stream().map(CassandraAspect::rowToEntityAspect); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java index 6a1ba72c37676..f1b7d761087b4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraRetentionService.java @@ -13,16 +13,18 @@ import com.datastax.oss.driver.api.querybuilder.select.Select; import com.datastax.oss.driver.api.querybuilder.select.Selector; import com.google.common.collect.ImmutableList; +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.retention.DataHubRetentionConfig; import com.linkedin.retention.Retention; @@ -43,21 +45,28 @@ @Slf4j @RequiredArgsConstructor -public class CassandraRetentionService extends RetentionService { - private final EntityService _entityService; +public class CassandraRetentionService extends RetentionService { + private final EntityService _entityService; private final CqlSession _cqlSession; private final int _batchSize; private final Clock _clock = Clock.systemUTC(); @Override - public EntityService getEntityService() { + public EntityService getEntityService() { return _entityService; } @Override - protected AspectsBatch buildAspectsBatch(List mcps) { - return AspectsBatchImpl.builder().mcps(mcps, _entityService.getEntityRegistry()).build(); + protected AspectsBatch buildAspectsBatch( + List mcps, @Nonnull AuditStamp auditStamp) { + return AspectsBatchImpl.builder() + .mcps( + mcps, + auditStamp, + _entityService.getEntityRegistry(), + _entityService.getSystemEntityClient()) + .build(); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 26946890daa3b..176a99d8d3a49 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -7,13 +7,13 @@ import com.datahub.util.exception.RetryLimitReached; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.ListResult; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.ExtraInfo; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java index e12f0f8f1b5d9..d1f54f8a7e6e5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanRetentionService.java @@ -1,14 +1,16 @@ package com.linkedin.metadata.entity.ebean; import com.datahub.util.RecordUtils; +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.RetentionService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.retention.DataHubRetentionConfig; import com.linkedin.retention.Retention; @@ -38,21 +40,28 @@ @Slf4j @RequiredArgsConstructor -public class EbeanRetentionService extends RetentionService { - private final EntityService _entityService; +public class EbeanRetentionService extends RetentionService { + private final EntityService _entityService; private final Database _server; private final int _batchSize; private final Clock _clock = Clock.systemUTC(); @Override - public EntityService getEntityService() { + public EntityService getEntityService() { return _entityService; } @Override - protected AspectsBatch buildAspectsBatch(List mcps) { - return AspectsBatchImpl.builder().mcps(mcps, _entityService.getEntityRegistry()).build(); + protected AspectsBatch buildAspectsBatch( + List mcps, @Nonnull AuditStamp auditStamp) { + return AspectsBatchImpl.builder() + .mcps( + mcps, + auditStamp, + _entityService.getEntityRegistry(), + _entityService.getSystemEntityClient()) + .build(); } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java new file mode 100644 index 0000000000000..4b75fe73a12e5 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -0,0 +1,143 @@ +package com.linkedin.metadata.entity.ebean.batch; + +import com.linkedin.common.AuditStamp; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.SystemAspect; +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Builder(toBuilder = true) +public class AspectsBatchImpl implements AspectsBatch { + + private final List items; + + /** + * Convert patches to upserts, apply hooks at the aspect and batch level. + * + * @param latestAspects latest version in the database + * @param entityRegistry entity registry + * @return The new urn/aspectnames and the uniform upserts, possibly expanded/mutated by the + * various hooks + */ + @Override + public Pair>, List> toUpsertBatchItems( + final Map> latestAspects, + EntityRegistry entityRegistry, + AspectRetriever aspectRetriever) { + + LinkedList upsertBatchItems = + items.stream() + .map( + item -> { + final String urnStr = item.getUrn().toString(); + // latest is also the old aspect + final SystemAspect latest = + latestAspects.getOrDefault(urnStr, Map.of()).get(item.getAspectName()); + + final MCPUpsertBatchItem upsertItem; + if (item instanceof MCPUpsertBatchItem) { + upsertItem = (MCPUpsertBatchItem) item; + } else { + // patch to upsert + MCPPatchBatchItem patchBatchItem = (MCPPatchBatchItem) item; + final RecordTemplate currentValue = + latest != null ? latest.getRecordTemplate(entityRegistry) : null; + upsertItem = + patchBatchItem.applyPatch(entityRegistry, currentValue, aspectRetriever); + } + + // Apply hooks + final SystemMetadata oldSystemMetadata = + latest != null ? latest.getSystemMetadata() : null; + final RecordTemplate oldAspectValue = + latest != null ? latest.getRecordTemplate(entityRegistry) : null; + upsertItem.applyMutationHooks( + oldAspectValue, oldSystemMetadata, entityRegistry, aspectRetriever); + + return upsertItem; + }) + .collect(Collectors.toCollection(LinkedList::new)); + + LinkedList newItems = + applyMCPSideEffects(upsertBatchItems, entityRegistry, aspectRetriever) + .collect(Collectors.toCollection(LinkedList::new)); + Map> newUrnAspectNames = getNewUrnAspectsMap(getUrnAspectsMap(), newItems); + upsertBatchItems.addAll(newItems); + + return Pair.of(newUrnAspectNames, upsertBatchItems); + } + + public static class AspectsBatchImplBuilder { + /** + * Just one aspect record template + * + * @param data aspect data + * @return builder + */ + public AspectsBatchImplBuilder one(BatchItem data) { + this.items = List.of(data); + return this; + } + + public AspectsBatchImplBuilder mcps( + List mcps, + AuditStamp auditStamp, + EntityRegistry entityRegistry, + AspectRetriever aspectRetriever) { + this.items = + mcps.stream() + .map( + mcp -> { + if (mcp.getChangeType().equals(ChangeType.PATCH)) { + return MCPPatchBatchItem.MCPPatchBatchItemBuilder.build( + mcp, auditStamp, entityRegistry); + } else { + return MCPUpsertBatchItem.MCPUpsertBatchItemBuilder.build( + mcp, auditStamp, entityRegistry, aspectRetriever); + } + }) + .collect(Collectors.toList()); + return this; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + AspectsBatchImpl that = (AspectsBatchImpl) o; + return Objects.equals(items, that.items); + } + + @Override + public int hashCode() { + return Objects.hash(items); + } + + @Override + public String toString() { + return "AspectsBatchImpl{" + "items=" + items + '}'; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java new file mode 100644 index 0000000000000..f61280bac4b22 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLBatchItemImpl.java @@ -0,0 +1,157 @@ +package com.linkedin.metadata.entity.ebean.batch; + +import static com.linkedin.metadata.entity.AspectUtils.validateAspect; + +import com.datahub.util.exception.ModelConversionException; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.util.Pair; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Getter +@Builder(toBuilder = true) +public class MCLBatchItemImpl implements MCLBatchItem { + + @Nonnull private final MetadataChangeLog metadataChangeLog; + + @Nullable private final RecordTemplate aspect; + + @Nullable private final RecordTemplate previousAspect; + + // derived + private final EntitySpec entitySpec; + private final AspectSpec aspectSpec; + + public static class MCLBatchItemImplBuilder { + + public MCLBatchItemImpl build( + MetadataChangeLog metadataChangeLog, + EntityRegistry entityRegistry, + AspectRetriever aspectRetriever) { + return MCLBatchItemImpl.builder() + .metadataChangeLog(metadataChangeLog) + .build(entityRegistry, aspectRetriever); + } + + public MCLBatchItemImpl build(EntityRegistry entityRegistry, AspectRetriever aspectRetriever) { + log.debug("entity type = {}", this.metadataChangeLog.getEntityType()); + entitySpec(entityRegistry.getEntitySpec(this.metadataChangeLog.getEntityType())); + aspectSpec(validateAspect(this.metadataChangeLog, this.entitySpec)); + + Urn urn = this.metadataChangeLog.getEntityUrn(); + if (urn == null) { + urn = + EntityKeyUtils.getUrnFromLog( + this.metadataChangeLog, this.entitySpec.getKeyAspectSpec()); + } + EntityUtils.validateUrn(entityRegistry, urn); + log.debug("entity type = {}", urn.getEntityType()); + + entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); + log.debug("entity spec = {}", this.entitySpec); + + aspectSpec(ValidationUtils.validate(this.entitySpec, this.metadataChangeLog.getAspectName())); + log.debug("aspect spec = {}", this.aspectSpec); + + Pair aspects = + convertToRecordTemplate(this.metadataChangeLog, aspectSpec); + + // validate new + ValidationUtils.validateRecordTemplate( + this.metadataChangeLog.getChangeType(), + entityRegistry, + this.entitySpec, + this.aspectSpec, + urn, + aspects.getFirst(), + aspectRetriever); + + return new MCLBatchItemImpl( + this.metadataChangeLog, + aspects.getFirst(), + aspects.getSecond(), + this.entitySpec, + this.aspectSpec); + } + + private MCLBatchItemImplBuilder entitySpec(EntitySpec entitySpec) { + this.entitySpec = entitySpec; + return this; + } + + private MCLBatchItemImplBuilder aspectSpec(AspectSpec aspectSpec) { + this.aspectSpec = aspectSpec; + return this; + } + + private static Pair convertToRecordTemplate( + MetadataChangeLog mcl, AspectSpec aspectSpec) { + final RecordTemplate aspect; + final RecordTemplate prevAspect; + try { + + if (!ChangeType.DELETE.equals(mcl.getChangeType())) { + aspect = + GenericRecordUtils.deserializeAspect( + mcl.getAspect().getValue(), mcl.getAspect().getContentType(), aspectSpec); + ValidationUtils.validateOrThrow(aspect); + } else { + aspect = null; + } + + if (mcl.getPreviousAspectValue() != null) { + prevAspect = + GenericRecordUtils.deserializeAspect( + mcl.getPreviousAspectValue().getValue(), + mcl.getPreviousAspectValue().getContentType(), + aspectSpec); + ValidationUtils.validateOrThrow(prevAspect); + } else { + prevAspect = null; + } + } catch (ModelConversionException e) { + throw new RuntimeException( + String.format( + "Could not deserialize %s for aspect %s", + mcl.getAspect().getValue(), mcl.getAspectName())); + } + + return Pair.of(aspect, prevAspect); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + MCLBatchItemImpl that = (MCLBatchItemImpl) o; + + return metadataChangeLog.equals(that.metadataChangeLog); + } + + @Override + public int hashCode() { + return metadataChangeLog.hashCode(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/PatchBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java similarity index 71% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/PatchBatchItem.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java index f9b1e340d5541..3adf384f3b0ed 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/PatchBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPPatchBatchItem.java @@ -1,6 +1,8 @@ -package com.linkedin.metadata.entity.ebean.transactions; +package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; +import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +import static com.linkedin.metadata.entity.AspectUtils.validateAspect; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.StreamReadConstraints; @@ -9,22 +11,26 @@ import com.github.fge.jsonpatch.JsonPatch; import com.github.fge.jsonpatch.JsonPatchException; import com.github.fge.jsonpatch.Patch; +import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.PatchItem; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.template.AspectTemplateEngine; import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.SystemMetadataUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Objects; +import javax.annotation.Nonnull; import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -32,7 +38,7 @@ @Slf4j @Getter @Builder(toBuilder = true) -public class PatchBatchItem extends AbstractBatchItem { +public class MCPPatchBatchItem extends PatchItem { private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); static { @@ -50,6 +56,7 @@ public class PatchBatchItem extends AbstractBatchItem { // aspectName name of the aspect being inserted private final String aspectName; private final SystemMetadata systemMetadata; + private final AuditStamp auditStamp; private final Patch patch; @@ -59,22 +66,22 @@ public class PatchBatchItem extends AbstractBatchItem { private final EntitySpec entitySpec; private final AspectSpec aspectSpec; + @Nonnull @Override public ChangeType getChangeType() { return ChangeType.PATCH; } - @Override - public void validateUrn(EntityRegistry entityRegistry, Urn urn) { - EntityUtils.validateUrn(entityRegistry, urn); - } - - public UpsertBatchItem applyPatch(EntityRegistry entityRegistry, RecordTemplate recordTemplate) { - UpsertBatchItem.UpsertBatchItemBuilder builder = - UpsertBatchItem.builder() + public MCPUpsertBatchItem applyPatch( + EntityRegistry entityRegistry, + RecordTemplate recordTemplate, + AspectRetriever aspectRetriever) { + MCPUpsertBatchItem.MCPUpsertBatchItemBuilder builder = + MCPUpsertBatchItem.builder() .urn(getUrn()) .aspectName(getAspectName()) .metadataChangeProposal(getMetadataChangeProposal()) + .auditStamp(auditStamp) .systemMetadata(getSystemMetadata()); AspectTemplateEngine aspectTemplateEngine = entityRegistry.getAspectTemplateEngine(); @@ -99,12 +106,18 @@ public UpsertBatchItem applyPatch(EntityRegistry entityRegistry, RecordTemplate throw new RuntimeException(e); } - return builder.build(entityRegistry); + return builder.build(entityRegistry, aspectRetriever); } - public static class PatchBatchItemBuilder { + public static class MCPPatchBatchItemBuilder { - public PatchBatchItem build(EntityRegistry entityRegistry) { + public MCPPatchBatchItem.MCPPatchBatchItemBuilder systemMetadata( + SystemMetadata systemMetadata) { + this.systemMetadata = SystemMetadataUtils.generateSystemMetadataIfEmpty(systemMetadata); + return this; + } + + public MCPPatchBatchItem build(EntityRegistry entityRegistry) { EntityUtils.validateUrn(entityRegistry, this.urn); log.debug("entity type = {}", this.urn.getEntityType()); @@ -119,22 +132,24 @@ public PatchBatchItem build(EntityRegistry entityRegistry) { String.format("Missing patch to apply. Aspect: %s", this.aspectSpec.getName())); } - return new PatchBatchItem( + return new MCPPatchBatchItem( this.urn, this.aspectName, - generateSystemMetadataIfEmpty(this.systemMetadata), + SystemMetadataUtils.generateSystemMetadataIfEmpty(this.systemMetadata), + this.auditStamp, this.patch, this.metadataChangeProposal, this.entitySpec, this.aspectSpec); } - public static PatchBatchItem build(MetadataChangeProposal mcp, EntityRegistry entityRegistry) { + public static MCPPatchBatchItem build( + MetadataChangeProposal mcp, AuditStamp auditStamp, EntityRegistry entityRegistry) { log.debug("entity type = {}", mcp.getEntityType()); EntitySpec entitySpec = entityRegistry.getEntitySpec(mcp.getEntityType()); AspectSpec aspectSpec = validateAspect(mcp, entitySpec); - if (!isValidChangeType(ChangeType.PATCH, aspectSpec)) { + if (!PatchItem.isValidChangeType(ChangeType.PATCH, aspectSpec)) { throw new UnsupportedOperationException( "ChangeType not supported: " + mcp.getChangeType() @@ -147,23 +162,23 @@ public static PatchBatchItem build(MetadataChangeProposal mcp, EntityRegistry en urn = EntityKeyUtils.getUrnFromProposal(mcp, entitySpec.getKeyAspectSpec()); } - PatchBatchItemBuilder builder = - PatchBatchItem.builder() - .urn(urn) - .aspectName(mcp.getAspectName()) - .systemMetadata(mcp.getSystemMetadata()) - .metadataChangeProposal(mcp) - .patch(convertToJsonPatch(mcp)); - - return builder.build(entityRegistry); + return MCPPatchBatchItem.builder() + .urn(urn) + .aspectName(mcp.getAspectName()) + .systemMetadata( + SystemMetadataUtils.generateSystemMetadataIfEmpty(mcp.getSystemMetadata())) + .metadataChangeProposal(mcp) + .auditStamp(auditStamp) + .patch(convertToJsonPatch(mcp)) + .build(entityRegistry); } - private PatchBatchItemBuilder entitySpec(EntitySpec entitySpec) { + private MCPPatchBatchItemBuilder entitySpec(EntitySpec entitySpec) { this.entitySpec = entitySpec; return this; } - private PatchBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { + private MCPPatchBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { this.aspectSpec = aspectSpec; return this; } @@ -187,7 +202,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - PatchBatchItem that = (PatchBatchItem) o; + MCPPatchBatchItem that = (MCPPatchBatchItem) o; return urn.equals(that.urn) && aspectName.equals(that.aspectName) && Objects.equals(systemMetadata, that.systemMetadata) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/UpsertBatchItem.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java similarity index 52% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/UpsertBatchItem.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java index c232e4846f7d1..9d41b141dcd60 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/UpsertBatchItem.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCPUpsertBatchItem.java @@ -1,59 +1,92 @@ -package com.linkedin.metadata.entity.ebean.transactions; +package com.linkedin.metadata.entity.ebean.batch; import static com.linkedin.metadata.Constants.ASPECT_LATEST_VERSION; +import static com.linkedin.metadata.entity.AspectUtils.validateAspect; import com.datahub.util.exception.ModelConversionException; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.SystemAspect; +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.SystemMetadataUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; import java.sql.Timestamp; import java.util.Objects; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Builder; import lombok.Getter; +import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @Slf4j @Getter @Builder(toBuilder = true) -public class UpsertBatchItem extends AbstractBatchItem { +public class MCPUpsertBatchItem extends UpsertItem { // urn an urn associated with the new aspect - private final Urn urn; + @Nonnull private final Urn urn; + // aspectName name of the aspect being inserted - private final String aspectName; - private final SystemMetadata systemMetadata; + @Nonnull private final String aspectName; + + @Nonnull private final RecordTemplate aspect; + + @Nonnull private final SystemMetadata systemMetadata; - private final RecordTemplate aspect; + @Nonnull private final AuditStamp auditStamp; - private final MetadataChangeProposal metadataChangeProposal; + @Nullable private final MetadataChangeProposal metadataChangeProposal; // derived - private final EntitySpec entitySpec; - private final AspectSpec aspectSpec; + @Nonnull private final EntitySpec entitySpec; + @Nonnull private final AspectSpec aspectSpec; + @Nonnull @Override public ChangeType getChangeType() { return ChangeType.UPSERT; } - @Override - public void validateUrn(EntityRegistry entityRegistry, Urn urn) { - EntityUtils.validateUrn(entityRegistry, urn); + public void applyMutationHooks( + @Nullable RecordTemplate oldAspectValue, + @Nullable SystemMetadata oldSystemMetadata, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + // add audit stamp/system meta if needed + for (MutationHook mutationHook : + entityRegistry.getMutationHooks( + getChangeType(), entitySpec.getName(), aspectSpec.getName())) { + mutationHook.applyMutation( + getChangeType(), + entitySpec, + aspectSpec, + oldAspectValue, + aspect, + oldSystemMetadata, + systemMetadata, + auditStamp, + aspectRetriever); + } } - public EntityAspect toLatestEntityAspect(AuditStamp auditStamp) { + @Override + public SystemAspect toLatestEntityAspect() { EntityAspect latest = new EntityAspect(); latest.setAspect(getAspectName()); latest.setMetadata(EntityUtils.toJsonAspect(getAspect())); @@ -61,12 +94,39 @@ public EntityAspect toLatestEntityAspect(AuditStamp auditStamp) { latest.setVersion(ASPECT_LATEST_VERSION); latest.setCreatedOn(new Timestamp(auditStamp.getTime())); latest.setCreatedBy(auditStamp.getActor().toString()); - return latest; + return latest.asSystemAspect(); + } + + @Override + public void validatePreCommit( + @Nullable RecordTemplate previous, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + + for (AspectPayloadValidator validator : + entityRegistry.getAspectPayloadValidators( + getChangeType(), entitySpec.getName(), aspectSpec.getName())) { + validator.validatePreCommit( + getChangeType(), urn, getAspectSpec(), previous, this.aspect, aspectRetriever); + } } - public static class UpsertBatchItemBuilder { + public static class MCPUpsertBatchItemBuilder { + + // Ensure use of other builders + private MCPUpsertBatchItem build() { + return null; + } + + public MCPUpsertBatchItemBuilder systemMetadata(SystemMetadata systemMetadata) { + this.systemMetadata = SystemMetadataUtils.generateSystemMetadataIfEmpty(systemMetadata); + return this; + } - public UpsertBatchItem build(EntityRegistry entityRegistry) { + @SneakyThrows + public MCPUpsertBatchItem build( + EntityRegistry entityRegistry, AspectRetriever aspectRetriever) { EntityUtils.validateUrn(entityRegistry, this.urn); log.debug("entity type = {}", this.urn.getEntityType()); @@ -77,19 +137,30 @@ public UpsertBatchItem build(EntityRegistry entityRegistry) { log.debug("aspect spec = {}", this.aspectSpec); ValidationUtils.validateRecordTemplate( - entityRegistry, this.entitySpec, this.urn, this.aspect); + ChangeType.UPSERT, + entityRegistry, + this.entitySpec, + this.aspectSpec, + this.urn, + this.aspect, + aspectRetriever); - return new UpsertBatchItem( + return new MCPUpsertBatchItem( this.urn, this.aspectName, - AbstractBatchItem.generateSystemMetadataIfEmpty(this.systemMetadata), this.aspect, + SystemMetadataUtils.generateSystemMetadataIfEmpty(this.systemMetadata), + this.auditStamp, this.metadataChangeProposal, this.entitySpec, this.aspectSpec); } - public static UpsertBatchItem build(MetadataChangeProposal mcp, EntityRegistry entityRegistry) { + public static MCPUpsertBatchItem build( + MetadataChangeProposal mcp, + AuditStamp auditStamp, + EntityRegistry entityRegistry, + AspectRetriever aspectRetriever) { if (!mcp.getChangeType().equals(ChangeType.UPSERT)) { throw new IllegalArgumentException( "Invalid MCP, this class only supports change type of UPSERT."); @@ -112,23 +183,23 @@ public static UpsertBatchItem build(MetadataChangeProposal mcp, EntityRegistry e urn = EntityKeyUtils.getUrnFromProposal(mcp, entitySpec.getKeyAspectSpec()); } - UpsertBatchItemBuilder builder = - UpsertBatchItem.builder() - .urn(urn) - .aspectName(mcp.getAspectName()) - .systemMetadata(mcp.getSystemMetadata()) - .metadataChangeProposal(mcp) - .aspect(convertToRecordTemplate(mcp, aspectSpec)); - - return builder.build(entityRegistry); + return MCPUpsertBatchItem.builder() + .urn(urn) + .aspectName(mcp.getAspectName()) + .systemMetadata( + SystemMetadataUtils.generateSystemMetadataIfEmpty(mcp.getSystemMetadata())) + .metadataChangeProposal(mcp) + .auditStamp(auditStamp) + .aspect(convertToRecordTemplate(mcp, aspectSpec)) + .build(entityRegistry, aspectRetriever); } - private UpsertBatchItemBuilder entitySpec(EntitySpec entitySpec) { + private MCPUpsertBatchItemBuilder entitySpec(EntitySpec entitySpec) { this.entitySpec = entitySpec; return this; } - private UpsertBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { + private MCPUpsertBatchItemBuilder aspectSpec(AspectSpec aspectSpec) { this.aspectSpec = aspectSpec; return this; } @@ -160,7 +231,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - UpsertBatchItem that = (UpsertBatchItem) o; + MCPUpsertBatchItem that = (MCPUpsertBatchItem) o; return urn.equals(that.urn) && aspectName.equals(that.aspectName) && Objects.equals(systemMetadata, that.systemMetadata) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/AspectsBatchImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/AspectsBatchImpl.java deleted file mode 100644 index 11261afdaa0b2..0000000000000 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/transactions/AspectsBatchImpl.java +++ /dev/null @@ -1,71 +0,0 @@ -package com.linkedin.metadata.entity.ebean.transactions; - -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; -import com.linkedin.metadata.entity.transactions.AspectsBatch; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.mxe.MetadataChangeProposal; -import java.util.List; -import java.util.Objects; -import java.util.stream.Collectors; -import lombok.Builder; -import lombok.Getter; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -@Getter -@Builder(toBuilder = true) -public class AspectsBatchImpl implements AspectsBatch { - private final List items; - - public static class AspectsBatchImplBuilder { - /** - * Just one aspect record template - * - * @param data aspect data - * @return builder - */ - public AspectsBatchImplBuilder one(AbstractBatchItem data) { - this.items = List.of(data); - return this; - } - - public AspectsBatchImplBuilder mcps( - List mcps, EntityRegistry entityRegistry) { - this.items = - mcps.stream() - .map( - mcp -> { - if (mcp.getChangeType().equals(ChangeType.PATCH)) { - return PatchBatchItem.PatchBatchItemBuilder.build(mcp, entityRegistry); - } else { - return UpsertBatchItem.UpsertBatchItemBuilder.build(mcp, entityRegistry); - } - }) - .collect(Collectors.toList()); - return this; - } - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - AspectsBatchImpl that = (AspectsBatchImpl) o; - return Objects.equals(items, that.items); - } - - @Override - public int hashCode() { - return Objects.hash(items); - } - - @Override - public String toString() { - return "AspectsBatchImpl{" + "items=" + items + '}'; - } -} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java index ad8fbfdf2eddd..3d7abee556290 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java @@ -12,7 +12,6 @@ import com.linkedin.data.schema.PathSpec; import com.linkedin.data.schema.validator.Validator; import com.linkedin.data.schema.validator.ValidatorContext; -import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.RelationshipFieldSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -48,8 +47,15 @@ protected void validateUrnField(ValidatorContext context) { String urnStr = (String) context.dataElement().getValue(); Urn urn = Urn.createFromString(urnStr); EntitySpec entitySpec = _entityRegistry.getEntitySpec(urn.getEntityType()); - RecordTemplate entityKey = - EntityKeyUtils.convertUrnToEntityKey(urn, entitySpec.getKeyAspectSpec()); + // This is not always false + if (entitySpec == null) { + throw new IllegalArgumentException( + String.format("Entity type %s is missing from entity registry", urn.getEntityType())); + } + + // Ensure urn conversion is successful + EntityKeyUtils.convertUrnToEntityKey(urn, entitySpec.getKeyAspectSpec()); + NamedDataSchema namedDataSchema = ((NamedDataSchema) context.dataElement().getSchema()); Class urnClass; try { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index 7f23bacdc4758..97f7aa06340d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -3,11 +3,17 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import java.util.function.Consumer; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -60,7 +66,13 @@ public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { } public static void validateRecordTemplate( - EntityRegistry entityRegistry, EntitySpec entitySpec, Urn urn, RecordTemplate aspect) { + ChangeType changeType, + EntityRegistry entityRegistry, + EntitySpec entitySpec, + AspectSpec aspectSpec, + Urn urn, + @Nullable RecordTemplate aspect, + @Nonnull AspectRetriever aspectRetriever) { EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); validator.setCurrentEntitySpec(entitySpec); Consumer resultFunction = @@ -73,13 +85,21 @@ public static void validateRecordTemplate( }; RecordTemplateValidator.validate( EntityUtils.buildKeyAspect(entityRegistry, urn), resultFunction, validator); - RecordTemplateValidator.validate(aspect, resultFunction, validator); - } - public static void validateRecordTemplate( - EntityRegistry entityRegistry, Urn urn, RecordTemplate aspect) { - EntitySpec entitySpec = entityRegistry.getEntitySpec(urn.getEntityType()); - validateRecordTemplate(entityRegistry, entitySpec, urn, aspect); + if (aspect != null) { + RecordTemplateValidator.validate(aspect, resultFunction, validator); + + for (AspectPayloadValidator aspectValidator : + entityRegistry.getAspectPayloadValidators( + changeType, entitySpec.getName(), aspectSpec.getName())) { + try { + aspectValidator.validateProposed(changeType, urn, aspectSpec, aspect, aspectRetriever); + } catch (AspectValidationException e) { + throw new IllegalArgumentException( + "Failed to validate aspect due to: " + e.getMessage(), e); + } + } + } } private ValidationUtils() {} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index b2c615c1f47f5..247d542604da7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -18,6 +18,8 @@ import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.GraphIndexUtils; import com.linkedin.metadata.graph.GraphService; @@ -39,8 +41,6 @@ import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; -import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; @@ -56,6 +56,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -73,6 +74,8 @@ public class UpdateIndicesService { private final SearchDocumentTransformer _searchDocumentTransformer; private final EntityIndexBuilders _entityIndexBuilders; + private SystemEntityClient systemEntityClient; + @Value("${featureFlags.graphServiceDiffModeEnabled:true}") private boolean _graphDiffMode; @@ -111,10 +114,25 @@ public UpdateIndicesService( public void handleChangeEvent(@Nonnull final MetadataChangeLog event) { try { - if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) { - handleUpdateChangeEvent(event); - } else if (event.getChangeType() == ChangeType.DELETE) { - handleDeleteChangeEvent(event); + MCLBatchItemImpl batch = + MCLBatchItemImpl.builder().build(event, _entityRegistry, systemEntityClient); + + Stream sideEffects = + _entityRegistry + .getMCLSideEffects( + event.getChangeType(), event.getEntityType(), event.getAspectName()) + .stream() + .flatMap( + mclSideEffect -> + mclSideEffect.apply(List.of(batch), _entityRegistry, systemEntityClient)); + + for (MCLBatchItem mclBatchItem : Stream.concat(Stream.of(batch), sideEffects).toList()) { + MetadataChangeLog hookEvent = mclBatchItem.getMetadataChangeLog(); + if (UPDATE_CHANGE_TYPES.contains(hookEvent.getChangeType())) { + handleUpdateChangeEvent(mclBatchItem); + } else if (hookEvent.getChangeType() == ChangeType.DELETE) { + handleDeleteChangeEvent(mclBatchItem); + } } } catch (IOException e) { throw new RuntimeException(e); @@ -130,38 +148,19 @@ public void handleChangeEvent(@Nonnull final MetadataChangeLog event) { * * @param event the change event to be processed. */ - public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) throws IOException { + private void handleUpdateChangeEvent(@Nonnull final MCLBatchItem event) throws IOException { - final EntitySpec entitySpec = getEventEntitySpec(event); - final Urn urn = EntityKeyUtils.getUrnFromLog(event, entitySpec.getKeyAspectSpec()); + final EntitySpec entitySpec = event.getEntitySpec(); + final AspectSpec aspectSpec = event.getAspectSpec(); + final Urn urn = event.getUrn(); - if (!event.hasAspectName() || !event.hasAspect()) { - log.error("Aspect or aspect name is missing. Skipping aspect processing..."); - return; - } - - AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); - if (aspectSpec == null) { - throw new RuntimeException( - String.format( - "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", - event.getEntityType(), event.getAspectName())); - } - - RecordTemplate aspect = - GenericRecordUtils.deserializeAspect( - event.getAspect().getValue(), event.getAspect().getContentType(), aspectSpec); - GenericAspect previousAspectValue = event.getPreviousAspectValue(); - RecordTemplate previousAspect = - previousAspectValue != null - ? GenericRecordUtils.deserializeAspect( - previousAspectValue.getValue(), previousAspectValue.getContentType(), aspectSpec) - : null; + RecordTemplate aspect = event.getAspect(); + RecordTemplate previousAspect = event.getPreviousAspect(); // Step 0. If the aspect is timeseries, add to its timeseries index. if (aspectSpec.isTimeseries()) { updateTimeseriesFields( - event.getEntityType(), + urn.getEntityType(), event.getAspectName(), urn, aspect, @@ -185,9 +184,9 @@ public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) thro && (systemMetadata == null || systemMetadata.getProperties() == null || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { - updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event); + updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); } else { - updateGraphService(urn, aspectSpec, aspect, event); + updateGraphService(urn, aspectSpec, aspect, event.getMetadataChangeLog()); } } @@ -203,34 +202,25 @@ public void handleUpdateChangeEvent(@Nonnull final MetadataChangeLog event) thro * * @param event the change event to be processed. */ - public void handleDeleteChangeEvent(@Nonnull final MetadataChangeLog event) { + private void handleDeleteChangeEvent(@Nonnull final MCLBatchItem event) { - final EntitySpec entitySpec = getEventEntitySpec(event); - final Urn urn = EntityKeyUtils.getUrnFromLog(event, entitySpec.getKeyAspectSpec()); - - if (!event.hasAspectName() || !event.hasPreviousAspectValue()) { - log.error("Previous aspect or aspect name is missing. Skipping aspect processing..."); - return; - } + final EntitySpec entitySpec = event.getEntitySpec(); + final Urn urn = event.getUrn(); AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); if (aspectSpec == null) { throw new RuntimeException( String.format( "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", - event.getEntityType(), event.getAspectName())); + urn.getEntityType(), event.getAspectName())); } - RecordTemplate aspect = - GenericRecordUtils.deserializeAspect( - event.getPreviousAspectValue().getValue(), - event.getPreviousAspectValue().getContentType(), - aspectSpec); + RecordTemplate aspect = event.getAspect(); Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); if (!aspectSpec.isTimeseries()) { deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event); + deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); deleteSearchData( _entitySearchService, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); } @@ -633,6 +623,7 @@ private EntitySpec getEventEntitySpec(@Nonnull final MetadataChangeLog event) { * @param systemEntityClient system entity client */ public void setSystemEntityClient(SystemEntityClient systemEntityClient) { + this.systemEntityClient = systemEntityClient; _searchDocumentTransformer.setEntityClient(systemEntityClient); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java index 2113e5a04f3a2..252ac2d633b98 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectIngestionUtils.java @@ -5,8 +5,8 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.key.CorpUserKey; import java.util.HashMap; import java.util.LinkedList; @@ -26,27 +26,24 @@ public static Map ingestCorpUserKeyAspects( @Nonnull public static Map ingestCorpUserKeyAspects( - EntityService entityService, int aspectCount, int startIndex) { + EntityService entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new CorpUserKey()); Map aspects = new HashMap<>(); - List items = new LinkedList<>(); + List items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:corpuser:tester%d", i)); CorpUserKey aspect = AspectGenerationUtils.createCorpUserKey(urn); aspects.put(urn, aspect); items.add( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) .aspect(aspect) + .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(entityService.getEntityRegistry())); + .build(entityService.getEntityRegistry(), entityService.getSystemEntityClient())); } - entityService.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), - AspectGenerationUtils.createAuditStamp(), - true, - true); + entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); return aspects; } @@ -61,25 +58,22 @@ public static Map ingestCorpUserInfoAspects( @Nonnull final EntityService entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); Map aspects = new HashMap<>(); - List items = new LinkedList<>(); + List items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:corpuser:tester%d", i)); String email = String.format("email%d@test.com", i); CorpUserInfo aspect = AspectGenerationUtils.createCorpUserInfo(email); aspects.put(urn, aspect); items.add( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) .aspect(aspect) + .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(entityService.getEntityRegistry())); + .build(entityService.getEntityRegistry(), entityService.getSystemEntityClient())); } - entityService.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), - AspectGenerationUtils.createAuditStamp(), - true, - true); + entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); return aspects; } @@ -94,7 +88,7 @@ public static Map ingestChartInfoAspects( @Nonnull final EntityService entityService, int aspectCount, int startIndex) { String aspectName = AspectGenerationUtils.getAspectName(new ChartInfo()); Map aspects = new HashMap<>(); - List items = new LinkedList<>(); + List items = new LinkedList<>(); for (int i = startIndex; i < startIndex + aspectCount; i++) { Urn urn = UrnUtils.getUrn(String.format("urn:li:chart:(looker,test%d)", i)); String title = String.format("Test Title %d", i); @@ -102,18 +96,15 @@ public static Map ingestChartInfoAspects( ChartInfo aspect = AspectGenerationUtils.createChartInfo(title, description); aspects.put(urn, aspect); items.add( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(aspectName) .aspect(aspect) + .auditStamp(AspectGenerationUtils.createAuditStamp()) .systemMetadata(AspectGenerationUtils.createSystemMetadata()) - .build(entityService.getEntityRegistry())); + .build(entityService.getEntityRegistry(), entityService.getSystemEntityClient())); } - entityService.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), - AspectGenerationUtils.createAuditStamp(), - true, - true); + entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); return aspects; } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index 74c81ff2e8602..bad47f9acf507 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -75,7 +75,7 @@ private void configureComponents() { _aspectDao, _mockProducer, _testEntityRegistry, - true, + false, _mockUpdateIndicesService, preProcessHooks); _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index eeb014f7afdc2..45e992576676d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -16,8 +16,8 @@ import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -73,7 +73,7 @@ public void setupTest() { _aspectDao, _mockProducer, _testEntityRegistry, - true, + false, _mockUpdateIndicesService, preProcessHooks); _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); @@ -116,28 +116,30 @@ public void testIngestListLatestAspects() throws AssertionError { // Ingest CorpUserInfo Aspect #3 CorpUserInfo writeAspect3 = AspectGenerationUtils.createCorpUserInfo("email3@test.com"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) .aspect(writeAspect2) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) .aspect(writeAspect3) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // List aspects ListResult batch1 = @@ -183,28 +185,30 @@ public void testIngestListUrns() throws AssertionError { // Ingest CorpUserInfo Aspect #3 RecordTemplate writeAspect3 = AspectGenerationUtils.createCorpUserKey(entityUrn3); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) .aspect(writeAspect2) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) .aspect(writeAspect3) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // List aspects urns ListUrnsResult batch1 = _entityServiceImpl.listUrns(entityUrn1.getEntityType(), 0, 2); @@ -447,8 +451,14 @@ public void run() { auditStamp.setActor(Urn.createFromString(Constants.DATAHUB_ACTOR)); auditStamp.setTime(System.currentTimeMillis()); AspectsBatchImpl batch = - AspectsBatchImpl.builder().mcps(mcps, entityService.getEntityRegistry()).build(); - entityService.ingestProposal(batch, auditStamp, false); + AspectsBatchImpl.builder() + .mcps( + mcps, + auditStamp, + entityService.getEntityRegistry(), + entityService.getSystemEntityClient()) + .build(); + entityService.ingestProposal(batch, false); } } catch (InterruptedException | URISyntaxException ie) { throw new RuntimeException(ie); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index f03811da35ea8..e9e67f4b2114e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -36,8 +36,8 @@ import com.linkedin.metadata.aspect.CorpUserAspect; import com.linkedin.metadata.aspect.CorpUserAspectArray; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; @@ -525,9 +525,8 @@ public void testReingestAspectsGetLatestAspects() throws Exception { _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); - verify(_mockProducer, times(1)) - .produceMetadataChangeLog( - Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); + verify(_mockProducer, times(0)) + .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -840,34 +839,37 @@ public void testRollbackAspect() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) .aspect(writeAspect2) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) .aspect(writeAspect3) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1Overwrite) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // this should no-op since this run has been overwritten AspectRowSummary rollbackOverwrittenAspect = new AspectRowSummary(); @@ -916,28 +918,30 @@ public void testRollbackKey() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(keyAspectName) .aspect(writeKey1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1Overwrite) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -994,40 +998,44 @@ public void testRollbackUrn() throws AssertionError { CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(keyAspectName) .aspect(writeKey1) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn2) .aspectName(aspectName) .aspect(writeAspect2) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn3) .aspectName(aspectName) .aspect(writeAspect3) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn1) .aspectName(aspectName) .aspect(writeAspect1Overwrite) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -1057,16 +1065,16 @@ public void testIngestGetLatestAspect() throws AssertionError { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #1 RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1090,14 +1098,14 @@ public void testIngestGetLatestAspect() throws AssertionError { items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect2) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #2 RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1134,16 +1142,16 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(1625792689, "run-123"); SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1) + .auditStamp(TEST_AUDIT_STAMP) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #1 EnvelopedAspect readAspect1 = @@ -1156,14 +1164,14 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect2) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #2 EnvelopedAspect readAspect2 = @@ -1199,16 +1207,16 @@ public void testIngestSameAspect() throws AssertionError { SystemMetadata metadata3 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-123", "run-456"); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #1 RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1232,14 +1240,14 @@ public void testIngestSameAspect() throws AssertionError { items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect2) .systemMetadata(metadata2) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); // Validate retrieval of CorpUserInfo Aspect #2 RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); @@ -1258,8 +1266,8 @@ public void testIngestSameAspect() throws AssertionError { DataTemplateUtil.areEqual( EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); - verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + verify(_mockProducer, times(0)) + .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -1283,46 +1291,51 @@ public void testRetention() throws AssertionError { Status writeAspect2a = new Status().setRemoved(false); Status writeAspect2b = new Status().setRemoved(true); - List items = + List items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1a) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1b) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName2) .aspect(writeAspect2) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName2) .aspect(writeAspect2a) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName2) .aspect(writeAspect2b) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName, 1), writeAspect1); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); @@ -1347,20 +1360,21 @@ public void testRetention() throws AssertionError { items = List.of( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName) .aspect(writeAspect1c) .systemMetadata(metadata1) - .build(_testEntityRegistry), - UpsertBatchItem.builder() + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient()), + MCPUpsertBatchItem.builder() .urn(entityUrn) .aspectName(aspectName2) .aspect(writeAspect2c) .systemMetadata(metadata1) - .build(_testEntityRegistry)); - _entityServiceImpl.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), TEST_AUDIT_STAMP, true, true); + .auditStamp(TEST_AUDIT_STAMP) + .build(_testEntityRegistry, _entityServiceImpl.getSystemEntityClient())); + _entityServiceImpl.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); assertNull(_entityServiceImpl.getAspect(entityUrn, aspectName, 1)); assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 13236e302c259..8d7701f6d174f 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -116,6 +116,7 @@ public void setup() { _entityRegistry = new ConfigEntityRegistry( new DataSchemaFactory("com.datahub.test"), + List.of(), TestEntityProfile.class .getClassLoader() .getResourceAsStream("test-entity-registry.yml")); diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java index 12c8ad7d0c69b..a227668e22e9b 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java @@ -19,6 +19,8 @@ import com.linkedin.dataset.DatasetLineageType; import com.linkedin.dataset.FineGrainedLineage; import com.linkedin.dataset.FineGrainedLineageArray; +import com.linkedin.dataset.FineGrainedLineageDownstreamType; +import com.linkedin.dataset.FineGrainedLineageUpstreamType; import com.linkedin.dataset.Upstream; import com.linkedin.dataset.UpstreamArray; import com.linkedin.dataset.UpstreamLineage; @@ -47,7 +49,9 @@ import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; +import com.linkedin.schema.NumberType; import com.linkedin.schema.SchemaField; +import com.linkedin.schema.SchemaFieldDataType; import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -425,6 +429,9 @@ private EntityRegistry createMockEntityRegistry() { .thenReturn(entitySpec); Mockito.when(mockEntityRegistry.getEntitySpec(Constants.DATASET_ENTITY_NAME)) .thenReturn(entitySpec); + Mockito.when(mockEntityRegistry.getEntitySpec(SCHEMA_FIELD_ENTITY_NAME)).thenReturn(entitySpec); + Mockito.when(mockEntityRegistry.getEntitySpec(DATA_PLATFORM_ENTITY_NAME)) + .thenReturn(entitySpec); Mockito.when(entitySpec.getAspectSpec(Constants.INPUT_FIELDS_ASPECT_NAME)) .thenReturn(aspectSpec); Mockito.when(entitySpec.getAspectSpec(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) @@ -462,6 +469,8 @@ private MetadataChangeLog createUpstreamLineageMCL( UpstreamLineage upstreamLineage = new UpstreamLineage(); FineGrainedLineageArray fineGrainedLineages = new FineGrainedLineageArray(); FineGrainedLineage fineGrainedLineage = new FineGrainedLineage(); + fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); + fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.DATASET); UrnArray upstreamUrns = new UrnArray(); upstreamUrns.add(upstreamUrn); fineGrainedLineage.setUpstreams(upstreamUrns); @@ -509,6 +518,9 @@ private MetadataChangeLog createInputFieldsMCL(Urn upstreamUrn, String downstrea inputField.setSchemaFieldUrn(upstreamUrn); SchemaField schemaField = new SchemaField(); schemaField.setFieldPath(downstreamFieldPath); + schemaField.setNativeDataType("int"); + schemaField.setType( + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); inputField.setSchemaField(schemaField); inputFieldsArray.add(inputField); inputFields.setFields(inputFieldsArray); diff --git a/metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml b/metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml index 0a1afcb235c29..c0af9e705d712 100644 --- a/metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml +++ b/metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml @@ -1,4 +1,9 @@ entities: + - name: dataPlatform + category: core + keyAspect: dataPlatformKey + aspects: + - dataPlatformInfo - name: dataHubIngestionSource keyAspect: dataHubIngestionSourceKey aspects: @@ -21,5 +26,9 @@ entities: keyAspect: chartKey aspects: - domains + - name: schemaField + category: core + keyAspect: schemaFieldKey + aspects: [] events: - name: entityChangeEvent \ No newline at end of file diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index 7223451f31b58..94399a67806a6 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -165,6 +165,252 @@ e.g. `datahub delete by-registry --registry-id=mycompany-dq-model:0.0.1 --hard` As you evolve the metadata model, you can publish new versions of the repository and deploy it into DataHub as well using the same steps outlined above. DataHub will check whether your new models are backwards compatible with the previous versioned model and decline loading models that are backwards incompatible. +### Custom Plugins + +Adding custom aspects to DataHub's existing data model is a powerful way to extend DataHub without forking the entire repo. Often however extending +just the data model is not enough and additional custom code might be required. For a few of these use cases a plugin framework was developed +to control how instances of custom aspects can be validated, mutated, and generate side effects (additional aspects). + +It should be noted that validation, mutation, and generation of the *core* DataHub aspects can lead to system corruption and should be used +by advanced users only. + +The `/config` endpoint documented above has been extended to return information on the instances of the various plugins as well as the classes +that were loaded for debugging purposes. + +```json +{ + "mycompany-dq-model": { + "0.0.0-dev": { + "plugins": { + "validatorCount": 1, + "mutationHookCount": 1, + "mcpSideEffectCount": 1, + "mclSideEffectCount": 1, + "validatorClasses": [ + "com.linkedin.metadata.aspect.plugins.validation.CustomDataQualityRulesValidator" + ], + "mutationHookClasses": [ + "com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator" + ], + "mcpSideEffectClasses": [ + "com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCPSideEffect" + ], + "mclSideEffectClasses": [ + "com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCLSideEffect" + ] + } + } + } +} +``` + +#### Custom Validators + +Custom aspects might require that instances of those aspects adhere to specific conditions or rules. These conditions could vary wildly depending on the use case however they could be as simple +as a null or range check for one or more fields within the custom aspect. Additionally, a lookup can be done on other aspects in order to validate the current aspect using the `AspectRetriever`. + +There are two integration points for validation. The first integration point is `on request` via the `validateProposedAspect` method where the aspect is validated independent of the previous value. This validation is performed +outside of any kind of database transaction and can perform more intensive checks without introducing added latency within a transaction. + +The second integration point for validation occurs within the database transaction using the `validatePreCommitAspect` and has access to the new aspect as well as the old aspect. See the included +example in [`CustomDataQualityRulesValidator.java`](src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java). + +Shown below is the interface to be implemented for a custom validator. + +```java +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + @Override + protected void validateProposedAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nonnull RecordTemplate aspectPayload, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + + } + + @Override + protected void validatePreCommitAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate previousAspect, + @Nonnull RecordTemplate proposedAspect, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + + } +} +``` + +In order to register this custom validator add the following to your `entity-registry.yml` file. This will activate +the validator to run on upsert operations for any entity with the custom aspect `customDataQualityRules`. Alternatively separate +validators could be written within the context of specific entities, in this case simply specify the entity name instead of `*`. + +```yaml + +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.validation.CustomDataQualityRulesValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: '*' + aspectName: customDataQualityRules +``` + +#### Custom Mutator + +**Warning: This hook is for advanced users only. It is possible to corrupt data and render your system inoperable.** + +In this example, we want to make sure that the field type is always lowercase regardless of the string being provided +by ingestion. The full example can be found in [`CustomDataQualityMutator.java`](src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java). + +```java +public class CustomDataQualityRulesMutator extends MutationHook { + @Override + protected void mutate( + @Nonnull ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate oldAspectValue, + @Nullable RecordTemplate newAspectValue, + @Nullable SystemMetadata oldSystemMetadata, + @Nullable SystemMetadata newSystemMetadata, + @Nonnull AuditStamp auditStamp, + @Nonnull AspectRetriever aspectRetriever) { + + if (newAspectValue != null) { + DataQualityRules newDataQualityRules = new DataQualityRules(newAspectValue.data()); + + for (DataQualityRule rule : newDataQualityRules.getRules()) { + // Ensure uniform lowercase + if (!rule.getType().toLowerCase().equals(rule.getType())) { + rule.setType(rule.getType().toLowerCase()); + } + } + } + } +} +``` + +```yaml +plugins: + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: '*' + aspectName: customDataQualityRules +``` + +#### MetadataChangeProposal (MCP) Side Effects + +**Warning: This hook is for advanced users only. It is possible to corrupt data and render your system inoperable.** + +MCP Side Effects allow for the creation of new aspects based on an input aspect. + +Notes: +* MCPs will write aspects to the primary data store (SQL for example) as well as the search indices. +* Side effects in general must include a dependency on the `metadata-io` module since it deals with lower level storage primitives. + +The full example can be found in [`CustomDataQualityRulesMCPSideEffect.java`](src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java). + +```java +public class CustomDataQualityRulesMCPSideEffect extends MCPSideEffect { + @Override + protected Stream applyMCPSideEffect( + UpsertItem input, EntityRegistry entityRegistry, @Nonnull AspectRetriever aspectRetriever) { + // Mirror aspects to another URN in SQL & Search + Urn mirror = UrnUtils.getUrn(input.getUrn().toString().replace(",PROD)", ",DEV)")); + return Stream.of( + MCPUpsertBatchItem.builder() + .urn(mirror) + .aspectName(input.getAspectName()) + .aspect(input.getAspect()) + .auditStamp(input.getAuditStamp()) + .systemMetadata(input.getSystemMetadata()) + .build(entityRegistry, aspectRetriever)); + } +} +``` + +```yaml +plugins: + mcpSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCPSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: '*' + aspectName: customDataQualityRules +``` + +#### MetadataChangeLog (MCL) Side Effects + +**Warning: This hook is for advanced users only. It is possible to corrupt data and render your system inoperable.** + +MCL Side Effects allow for the creation of new aspects based on an input aspect. In this example, we are generating a timeseries aspect to represent an event. When a DataQualityRule is created +or modified we'll record the actor, event type, and timestamp in a timeseries aspect index. + +Notes: +* MCLs are only persisted to the search indices which allows for adding to the search documents only. +* Dependency on the `metadata-io` module since it deals with lower level storage primitives. + +The full example can be found in [`CustomDataQualityRulesMCLSideEffect.java`](src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java). + +```java +public class CustomDataQualityRulesMCLSideEffect extends MCLSideEffect { + @Override + protected Stream applyMCLSideEffect( + @Nonnull MCLBatchItem input, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + + // Generate Timeseries event aspect based on non-Timeseries aspect + MetadataChangeLog originMCP = input.getMetadataChangeLog(); + + Optional timeseriesOptional = + buildEvent(originMCP) + .map( + event -> { + try { + MetadataChangeLog eventMCP = originMCP.clone(); + eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); + eventMCP.setAspectName("customDataQualityRuleEvent"); + return eventMCP; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + }) + .map( + eventMCP -> + MCLBatchItemImpl.builder() + .metadataChangeLog(eventMCP) + .build(entityRegistry, aspectRetriever)); + + return timeseriesOptional.stream(); + } +} +``` + +```yaml +plugins: + mclSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCLSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules +``` + ## The Future Hopefully this repository shows you how easily you can extend and customize DataHub's metadata model! diff --git a/metadata-models-custom/build.gradle b/metadata-models-custom/build.gradle index 3ac08dca7c0db..8bf9d3b2f491e 100644 --- a/metadata-models-custom/build.gradle +++ b/metadata-models-custom/build.gradle @@ -14,7 +14,7 @@ buildscript { } plugins { - id 'base' + id 'java-library' id 'maven-publish' id 'pegasus' } @@ -25,15 +25,19 @@ if (project.hasProperty('projVersion')) { project.version = '0.0.0-dev' } - dependencies { implementation spec.product.pegasus.data // Uncomment these if you want to depend on models defined in core datahub - //implementation project(':li-utils') - //dataModel project(':li-utils') - //implementation project(':metadata-models') - //dataModel project(':metadata-models') - + // DataQualityRuleEvent in this example uses Urn and TimeseriesAspectBase + implementation project(':li-utils') + dataModel project(':li-utils') + implementation project(':metadata-models') + dataModel project(':metadata-models') + + // Required for custom code plugins + implementation project(':entity-registry') + // Required for MCL/MCP hooks + implementation project (':metadata-io') } def deployBaseDir = findProperty('pluginModelsDir') ?: file(project.gradle.gradleUserHomeDir.parent + "/.datahub/plugins/models") @@ -43,9 +47,10 @@ pegasus.main.generationModes = [PegasusGenerationMode.PEGASUS, PegasusGeneration task modelArtifact(type: Zip) { + dependsOn jar from(layout.buildDirectory.dir("libs")) { - include "*-data-template-*.jar" + include "*.jar" exclude "*-test-data-template-*.jar" into "libs" } diff --git a/metadata-models-custom/registry/entity-registry.yaml b/metadata-models-custom/registry/entity-registry.yaml index 2b501946ca858..e6180172837e0 100644 --- a/metadata-models-custom/registry/entity-registry.yaml +++ b/metadata-models-custom/registry/entity-registry.yaml @@ -3,6 +3,40 @@ entities: - name: dataset aspects: - customDataQualityRules + - customDataQualityRuleEvent - name: container aspects: - - customDataQualityRules \ No newline at end of file + - customDataQualityRules +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.validation.CustomDataQualityRulesValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + mutationHooks: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + mclSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCLSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + mcpSideEffects: + - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMCPSideEffect' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules \ No newline at end of file diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java new file mode 100644 index 0000000000000..a8735bae1521a --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java @@ -0,0 +1,72 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.metadata.aspect.batch.MCLBatchItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.mycompany.dq.DataQualityRuleEvent; +import java.util.Optional; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +public class CustomDataQualityRulesMCLSideEffect extends MCLSideEffect { + + public CustomDataQualityRulesMCLSideEffect(AspectPluginConfig config) { + super(config); + } + + @Override + protected Stream applyMCLSideEffect( + @Nonnull MCLBatchItem input, + @Nonnull EntityRegistry entityRegistry, + @Nonnull AspectRetriever aspectRetriever) { + + // Generate Timeseries event aspect based on non-Timeseries aspect + MetadataChangeLog originMCP = input.getMetadataChangeLog(); + + Optional timeseriesOptional = + buildEvent(originMCP) + .map( + event -> { + try { + MetadataChangeLog eventMCP = originMCP.clone(); + eventMCP.setAspect(GenericRecordUtils.serializeAspect(event)); + eventMCP.setAspectName("customDataQualityRuleEvent"); + return eventMCP; + } catch (CloneNotSupportedException e) { + throw new RuntimeException(e); + } + }) + .map( + eventMCP -> + MCLBatchItemImpl.builder() + .metadataChangeLog(eventMCP) + .build(entityRegistry, aspectRetriever)); + + return timeseriesOptional.stream(); + } + + private Optional buildEvent(MetadataChangeLog originMCP) { + if (originMCP.getAspect() != null) { + DataQualityRuleEvent event = new DataQualityRuleEvent(); + if (event.getActor() != null) { + event.setActor(event.getActor()); + } + event.setEventTimestamp(originMCP.getSystemMetadata().getLastObserved()); + event.setTimestampMillis(originMCP.getSystemMetadata().getLastObserved()); + if (originMCP.getPreviousAspectValue() == null) { + event.setEventType("RuleCreated"); + } else { + event.setEventType("RuleUpdated"); + } + event.setAffectedDataset(originMCP.getEntityUrn()); + + return Optional.of(event); + } + + return Optional.empty(); + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java new file mode 100644 index 0000000000000..2c989725f4f9d --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.batch.UpsertItem; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.util.stream.Stream; +import javax.annotation.Nonnull; + +public class CustomDataQualityRulesMCPSideEffect extends MCPSideEffect { + + public CustomDataQualityRulesMCPSideEffect(AspectPluginConfig aspectPluginConfig) { + super(aspectPluginConfig); + } + + @Override + protected Stream applyMCPSideEffect( + UpsertItem input, EntityRegistry entityRegistry, @Nonnull AspectRetriever aspectRetriever) { + // Mirror aspects to another URN in SQL & Search + Urn mirror = UrnUtils.getUrn(input.getUrn().toString().replace(",PROD)", ",DEV)")); + return Stream.of( + MCPUpsertBatchItem.builder() + .urn(mirror) + .aspectName(input.getAspectName()) + .aspect(input.getAspect()) + .auditStamp(input.getAuditStamp()) + .systemMetadata(input.getSystemMetadata()) + .build(entityRegistry, aspectRetriever)); + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java new file mode 100644 index 0000000000000..576ba3bf305f5 --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java @@ -0,0 +1,45 @@ +package com.linkedin.metadata.aspect.plugins.hooks; + +import com.linkedin.common.AuditStamp; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.mxe.SystemMetadata; +import com.mycompany.dq.DataQualityRule; +import com.mycompany.dq.DataQualityRules; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class CustomDataQualityRulesMutator extends MutationHook { + + public CustomDataQualityRulesMutator(AspectPluginConfig config) { + super(config); + } + + @Override + protected void mutate( + @Nonnull ChangeType changeType, + @Nonnull EntitySpec entitySpec, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate oldAspectValue, + @Nullable RecordTemplate newAspectValue, + @Nullable SystemMetadata oldSystemMetadata, + @Nullable SystemMetadata newSystemMetadata, + @Nonnull AuditStamp auditStamp, + @Nonnull AspectRetriever aspectRetriever) { + + if (newAspectValue != null) { + DataQualityRules newDataQualityRules = new DataQualityRules(newAspectValue.data()); + + for (DataQualityRule rule : newDataQualityRules.getRules()) { + // Ensure uniform lowercase + if (!rule.getType().toLowerCase().equals(rule.getType())) { + rule.setType(rule.getType().toLowerCase()); + } + } + } + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java new file mode 100644 index 0000000000000..667d7ad614a79 --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java @@ -0,0 +1,70 @@ +package com.linkedin.metadata.aspect.plugins.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.models.AspectSpec; +import com.mycompany.dq.DataQualityRule; +import com.mycompany.dq.DataQualityRules; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + + public CustomDataQualityRulesValidator(AspectPluginConfig config) { + super(config); + } + + @Override + protected void validateProposedAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nonnull RecordTemplate aspectPayload, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + DataQualityRules rules = new DataQualityRules(aspectPayload.data()); + + // Enforce at least 1 rule + if (rules.getRules().isEmpty()) { + throw new AspectValidationException("At least one rule is required."); + } + } + + @Override + protected void validatePreCommitAspect( + @Nonnull ChangeType changeType, + @Nonnull Urn entityUrn, + @Nonnull AspectSpec aspectSpec, + @Nullable RecordTemplate previousAspect, + @Nonnull RecordTemplate proposedAspect, + @Nonnull AspectRetriever aspectRetriever) + throws AspectValidationException { + + if (previousAspect != null) { + DataQualityRules oldRules = new DataQualityRules(previousAspect.data()); + DataQualityRules newRules = new DataQualityRules(proposedAspect.data()); + + Map newFieldTypeMap = + newRules.getRules().stream() + .filter(rule -> rule.getField() != null) + .map(rule -> Map.entry(rule.getField(), rule.getType())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // Ensure the old and new field type is the same + for (DataQualityRule oldRule : oldRules.getRules()) { + if (!newFieldTypeMap + .getOrDefault(oldRule.getField(), oldRule.getType()) + .equals(oldRule.getType())) { + throw new AspectValidationException( + String.format( + "Field type mismatch. Field: %s Old: %s New: %s", + oldRule.getField(), oldRule.getType(), newFieldTypeMap.get(oldRule.getField()))); + } + } + } + } +} diff --git a/metadata-models-custom/src/main/pegasus/com/mycompany/dq/DataQualityRuleEvent.pdl b/metadata-models-custom/src/main/pegasus/com/mycompany/dq/DataQualityRuleEvent.pdl new file mode 100644 index 0000000000000..075c90898c43f --- /dev/null +++ b/metadata-models-custom/src/main/pegasus/com/mycompany/dq/DataQualityRuleEvent.pdl @@ -0,0 +1,44 @@ +namespace com.mycompany.dq + +import com.linkedin.common.Urn +import com.linkedin.timeseries.TimeseriesAspectBase + +/** + * Operational info for an entity. + */ + @Aspect = { + "name": "customDataQualityRuleEvent", + "type": "timeseries" + } +record DataQualityRuleEvent includes TimeseriesAspectBase { + + /** + * Actor who issued this operation. + */ + @TimeseriesField = {} + actor: optional Urn + + /** + * Event type. + */ + @TimeseriesField = {} + eventType: string + + /** + * Which dataset was affected by this event. + */ + @TimeseriesFieldCollection = {"key":"datasetUrn"} + affectedDataset: optional Urn + + /** + * Custom properties + */ + customProperties: optional map[string, string] + + /** + * The time at which the event occurred. + */ + @TimeseriesField = {} + @Searchable = { "fieldType": "DATETIME", "fieldName": "eventTimestamp" } + eventTimestamp: long +} diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java index 2879f15784370..c631bede45364 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/token/StatefulTokenService.java @@ -12,7 +12,8 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.key.DataHubAccessTokenKey; import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -40,7 +41,7 @@ @Slf4j public class StatefulTokenService extends StatelessTokenService { - private final EntityService _entityService; + private final EntityService _entityService; private final LoadingCache _revokedTokenCache; private final String salt; @@ -48,7 +49,7 @@ public StatefulTokenService( @Nonnull final String signingKey, @Nonnull final String signingAlgorithm, @Nullable final String iss, - @Nonnull final EntityService entityService, + @Nonnull final EntityService entityService, @Nonnull final String salt) { super(signingKey, signingAlgorithm, iss); this._entityService = entityService; @@ -153,9 +154,12 @@ public String generateAccessToken( _entityService.ingestProposal( AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), _entityService.getEntityRegistry()) + .mcps( + proposalStream.collect(Collectors.toList()), + auditStamp, + _entityService.getEntityRegistry(), + _entityService.getSystemEntityClient()) .build(), - auditStamp, false); return accessToken; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index e75ec0c0dc44a..88a3f5749343b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -8,6 +8,7 @@ import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.mxe.TopicConvention; @@ -35,7 +36,7 @@ public class EntityServiceFactory { "entityRegistry" }) @Nonnull - protected EntityService createInstance( + protected EntityService createInstance( Producer producer, TopicConvention convention, KafkaHealthChecker kafkaHealthChecker, diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java index 080845147766f..c550fc161b606 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/JavaEntityClientFactory.java @@ -8,6 +8,7 @@ import com.linkedin.metadata.client.SystemJavaEntityClient; import com.linkedin.metadata.entity.DeleteEntityService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.LineageSearchService; @@ -28,7 +29,7 @@ public class JavaEntityClientFactory { @Autowired @Qualifier("entityService") - private EntityService _entityService; + private EntityService _entityService; @Autowired @Qualifier("deleteEntityService") diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java index b02541586de49..dae5f903d7d80 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/RetentionServiceFactory.java @@ -5,6 +5,7 @@ import com.linkedin.metadata.entity.RetentionService; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.spring.YamlPropertySourceFactory; import io.ebean.Database; import javax.annotation.Nonnull; @@ -23,7 +24,7 @@ public class RetentionServiceFactory { @Autowired @Qualifier("entityService") - private EntityService _entityService; + private EntityService _entityService; @Value("${RETENTION_APPLICATION_BATCH_SIZE:1000}") private Integer _batchSize; diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java index 9ccb2c3f650bd..ff5d3f215d86b 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/UpgradeStep.java @@ -7,6 +7,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.key.DataHubUpgradeKey; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -20,7 +21,7 @@ @Slf4j public abstract class UpgradeStep implements BootstrapStep { - protected final EntityService _entityService; + protected final EntityService _entityService; private final String _version; private final String _upgradeId; private final Urn _upgradeUrn; diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java index ae4baee37c822..e2f0b70526af5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStep.java @@ -10,8 +10,8 @@ import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.EntityKeyUtils; @@ -28,7 +28,7 @@ public class IngestDataPlatformInstancesStep implements BootstrapStep { private static final int BATCH_SIZE = 1000; - private final EntityService _entityService; + private final EntityService _entityService; private final AspectMigrationsDao _migrationsDao; @Override @@ -65,27 +65,28 @@ public void execute() throws Exception { start, start + BATCH_SIZE); - List items = new LinkedList<>(); + List items = new LinkedList<>(); + final AuditStamp aspectAuditStamp = + new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()); for (String urnStr : _migrationsDao.listAllUrns(start, start + BATCH_SIZE)) { Urn urn = Urn.createFromString(urnStr); Optional dataPlatformInstance = getDataPlatformInstance(urn); if (dataPlatformInstance.isPresent()) { items.add( - UpsertBatchItem.builder() + MCPUpsertBatchItem.builder() .urn(urn) .aspectName(DATA_PLATFORM_INSTANCE_ASPECT_NAME) .aspect(dataPlatformInstance.get()) - .build(_entityService.getEntityRegistry())); + .auditStamp(aspectAuditStamp) + .build( + _entityService.getEntityRegistry(), _entityService.getSystemEntityClient())); } } - final AuditStamp aspectAuditStamp = - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); - _entityService.ingestAspects( - AspectsBatchImpl.builder().items(items).build(), aspectAuditStamp, true, true); + _entityService.ingestAspects(AspectsBatchImpl.builder().items(items).build(), true, true); log.info( "Finished ingesting DataPlatformInstance for urn {} to {}", start, start + BATCH_SIZE); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java index db8cad65caa8a..37eac6d5ec470 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestDataPlatformsStep.java @@ -12,8 +12,8 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import java.io.IOException; import java.net.URISyntaxException; import java.util.List; @@ -31,7 +31,7 @@ public class IngestDataPlatformsStep implements BootstrapStep { private static final String PLATFORM_ASPECT_NAME = "dataPlatformInfo"; - private final EntityService _entityService; + private final EntityService _entityService; @Override public String name() { @@ -62,7 +62,7 @@ public void execute() throws IOException, URISyntaxException { } // 2. For each JSON object, cast into a DataPlatformSnapshot object. - List dataPlatformAspects = + List dataPlatformAspects = StreamSupport.stream( Spliterators.spliteratorUnknownSize(dataPlatforms.iterator(), Spliterator.ORDERED), false) @@ -82,20 +82,25 @@ public void execute() throws IOException, URISyntaxException { RecordUtils.toRecordTemplate( DataPlatformInfo.class, dataPlatform.get("aspect").toString()); - return UpsertBatchItem.builder() - .urn(urn) - .aspectName(PLATFORM_ASPECT_NAME) - .aspect(info) - .build(_entityService.getEntityRegistry()); + try { + return MCPUpsertBatchItem.builder() + .urn(urn) + .aspectName(PLATFORM_ASPECT_NAME) + .aspect(info) + .auditStamp( + new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis())) + .build( + _entityService.getEntityRegistry(), + _entityService.getSystemEntityClient()); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } }) .collect(Collectors.toList()); _entityService.ingestAspects( - AspectsBatchImpl.builder().items(dataPlatformAspects).build(), - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()), - true, - false); + AspectsBatchImpl.builder().items(dataPlatformAspects).build(), true, false); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java index f5a76b5f75778..fc1c82fc6d631 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestOwnershipTypesStep.java @@ -11,7 +11,7 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -100,9 +100,12 @@ private void ingestOwnershipType( _entityService.ingestProposal( AspectsBatchImpl.builder() - .mcps(List.of(keyAspectProposal, proposal), _entityService.getEntityRegistry()) + .mcps( + List.of(keyAspectProposal, proposal), + auditStamp, + _entityService.getEntityRegistry(), + _entityService.getSystemEntityClient()) .build(), - auditStamp, false); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java index 2aa5fe4f46b65..9b9feb8e14638 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestPoliciesStep.java @@ -15,7 +15,7 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListUrnsResult; @@ -205,11 +205,14 @@ private void ingestPolicy(final Urn urn, final DataHubPolicyInfo info) throws UR _entityService.ingestProposal( AspectsBatchImpl.builder() - .mcps(List.of(keyAspectProposal, proposal), _entityRegistry) + .mcps( + List.of(keyAspectProposal, proposal), + new AuditStamp() + .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()), + _entityRegistry, + _entityService.getSystemEntityClient()) .build(), - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()), false); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java index f3c395abdfc3a..67c3cca3384e3 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/IngestRolesStep.java @@ -12,7 +12,7 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.EntityKeyUtils; @@ -125,11 +125,14 @@ private void ingestRole( _entityService.ingestProposal( AspectsBatchImpl.builder() - .mcps(List.of(keyAspectProposal, proposal), _entityRegistry) + .mcps( + List.of(keyAspectProposal, proposal), + new AuditStamp() + .setActor(Urn.createFromString(SYSTEM_ACTOR)) + .setTime(System.currentTimeMillis()), + _entityRegistry, + _entityService.getSystemEntityClient()) .build(), - new AuditStamp() - .setActor(Urn.createFromString(SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()), false); _entityService.alwaysProduceMCLAsync( diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreColumnLineageIndices.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreColumnLineageIndices.java index 333928999f453..919ba93c9213e 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreColumnLineageIndices.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreColumnLineageIndices.java @@ -10,6 +10,7 @@ import com.linkedin.metadata.boot.UpgradeStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ListResult; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ExtraInfo; @@ -30,7 +31,8 @@ public class RestoreColumnLineageIndices extends UpgradeStep { private final EntityRegistry _entityRegistry; public RestoreColumnLineageIndices( - @Nonnull final EntityService entityService, @Nonnull final EntityRegistry entityRegistry) { + @Nonnull final EntityService entityService, + @Nonnull final EntityRegistry entityRegistry) { super(entityService, VERSION, UPGRADE_ID); _entityRegistry = Objects.requireNonNull(entityRegistry, "entityRegistry must not be null"); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java index bb7ad80ef73d2..e2d367a034491 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/RestoreDbtSiblingsIndices.java @@ -13,6 +13,7 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.key.DataHubUpgradeKey; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -46,7 +47,7 @@ public class RestoreDbtSiblingsIndices implements BootstrapStep { private static final Integer BATCH_SIZE = 1000; private static final Integer SLEEP_SECONDS = 120; - private final EntityService _entityService; + private final EntityService _entityService; private final EntityRegistry _entityRegistry; @Override diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java index 976698f3032d2..41672a07a2389 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java @@ -8,7 +8,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.entity.AspectMigrationsDao; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -96,7 +96,7 @@ public void testExecuteChecksKeySpecForAllUrns() throws Exception { @Test public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throws Exception { final EntityRegistry entityRegistry = getTestEntityRegistry(); - final EntityService entityService = mock(EntityService.class); + final EntityService entityService = mock(EntityService.class); final AspectMigrationsDao migrationsDao = mock(AspectMigrationsDao.class); final int countOfCorpUserEntities = 5; final int countOfChartEntities = 7; @@ -122,9 +122,8 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((UpsertBatchItem) item).getAspect() + && ((MCPUpsertBatchItem) item).getAspect() instanceof DataPlatformInstance)), - any(), anyBoolean(), anyBoolean()); verify(entityService, times(0)) @@ -137,9 +136,8 @@ public void testExecuteWhenSomeEntitiesShouldReceiveDataPlatformInstance() throw item.getUrn().getEntityType().equals("chart") && item.getAspectName() .equals(DATA_PLATFORM_INSTANCE_ASPECT_NAME) - && ((UpsertBatchItem) item).getAspect() + && ((MCPUpsertBatchItem) item).getAspect() instanceof DataPlatformInstance)), - any(), anyBoolean(), anyBoolean()); } diff --git a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java index 31cd3e6c69e50..fc935514f4138 100644 --- a/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java +++ b/metadata-service/openapi-entity-servlet/src/main/java/io/datahubproject/openapi/delegates/EntityApiDelegateImpl.java @@ -120,7 +120,7 @@ public ResponseEntity> create(List body) { OpenApiEntitiesUtil.convertEntityToUpsert(b, _reqClazz, _entityRegistry) .stream()) .collect(Collectors.toList()); - _v1Controller.postEntities(aspects); + _v1Controller.postEntities(aspects, false); List responses = body.stream() .map(req -> OpenApiEntitiesUtil.convertToResponse(req, _respClazz, _entityRegistry)) @@ -129,7 +129,7 @@ public ResponseEntity> create(List body) { } public ResponseEntity delete(String urn) { - _v1Controller.deleteEntities(new String[] {urn}, false); + _v1Controller.deleteEntities(new String[] {urn}, false, false); return new ResponseEntity<>(HttpStatus.OK); } @@ -165,7 +165,7 @@ public ResponseEntity createAspect( UpsertAspectRequest aspectUpsert = OpenApiEntitiesUtil.convertAspectToUpsert(urn, body, reqClazz); _v1Controller.postEntities( - Stream.of(aspectUpsert).filter(Objects::nonNull).collect(Collectors.toList())); + Stream.of(aspectUpsert).filter(Objects::nonNull).collect(Collectors.toList()), false); AR response = OpenApiEntitiesUtil.convertToResponseAspect(body, respClazz); return ResponseEntity.ok(response); } @@ -185,7 +185,7 @@ public ResponseEntity headAspect(String urn, String aspect) { public ResponseEntity deleteAspect(String urn, String aspect) { _entityService.deleteAspect(urn, aspect, Map.of(), false); - _v1Controller.deleteEntities(new String[] {urn}, false); + _v1Controller.deleteEntities(new String[] {urn}, false, false); return new ResponseEntity<>(HttpStatus.OK); } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java index 6e0fc5deb0b3c..ff65db09c2682 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/entities/EntitiesController.java @@ -17,6 +17,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; import io.datahubproject.openapi.dto.RollbackRunResultDto; @@ -32,6 +33,7 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -63,7 +65,7 @@ description = "APIs for ingesting and accessing entities and their constituent aspects") public class EntitiesController { - private final EntityService _entityService; + private final EntityService _entityService; private final ObjectMapper _objectMapper; private final AuthorizerChain _authorizerChain; @@ -152,7 +154,8 @@ public ResponseEntity getEntities( @PostMapping(value = "/", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> postEntities( - @RequestBody @Nonnull List aspectRequests) { + @RequestBody @Nonnull List aspectRequests, + @RequestParam(required = false, name = "async") Boolean async) { log.info("INGEST PROPOSAL proposal: {}", aspectRequests); Authentication authentication = AuthenticationContext.getAuthentication(); @@ -174,9 +177,14 @@ public ResponseEntity> postEntities( throw new UnauthorizedException(actorUrnStr + " is unauthorized to edit entities."); } + boolean asyncBool = + Objects.requireNonNullElseGet( + async, () -> Boolean.parseBoolean(System.getenv("ASYNC_INGEST_DEFAULT"))); List> responses = proposals.stream() - .map(proposal -> MappingUtil.ingestProposal(proposal, actorUrnStr, _entityService)) + .map( + proposal -> + MappingUtil.ingestProposal(proposal, actorUrnStr, _entityService, asyncBool)) .collect(Collectors.toList()); if (responses.stream().anyMatch(Pair::getSecond)) { return ResponseEntity.status(HttpStatus.CREATED) @@ -205,7 +213,8 @@ public ResponseEntity> deleteEntities( description = "Determines whether the delete will be soft or hard, defaults to true for soft delete") @RequestParam(value = "soft", defaultValue = "true") - boolean soft) { + boolean soft, + @RequestParam(required = false, name = "async") Boolean async) { Throwable exceptionally = null; try (Timer.Context context = MetricUtils.timer("deleteEntities").time()) { Authentication authentication = AuthenticationContext.getAuthentication(); @@ -250,6 +259,9 @@ public ResponseEntity> deleteEntities( .map(entityUrn -> MappingUtil.createStatusRemoval(entityUrn, _entityService)) .collect(Collectors.toList()); + boolean asyncBool = + Objects.requireNonNullElseGet( + async, () -> Boolean.parseBoolean(System.getenv("ASYNC_INGEST_DEFAULT"))); return ResponseEntity.ok( Collections.singletonList( RollbackRunResultDto.builder() @@ -262,7 +274,7 @@ public ResponseEntity> deleteEntities( .map( proposal -> MappingUtil.ingestProposal( - proposal, actorUrnStr, _entityService)) + proposal, actorUrnStr, _entityService, asyncBool)) .filter(Pair::getSecond) .map(Pair::getFirst) .map(urnString -> AspectRowSummary.builder().urn(urnString).build()) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java index 370f2019a42dd..3cc67e77ec27e 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/platform/entities/PlatformEntitiesController.java @@ -9,6 +9,8 @@ import com.google.common.collect.ImmutableList; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; +import com.linkedin.metadata.search.client.CachingEntitySearchService; import com.linkedin.util.Pair; import io.datahubproject.openapi.exception.UnauthorizedException; import io.datahubproject.openapi.generated.MetadataChangeProposal; @@ -16,6 +18,7 @@ import io.swagger.v3.oas.annotations.tags.Tag; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.stream.Collectors; import javax.annotation.Nonnull; import lombok.RequiredArgsConstructor; @@ -30,6 +33,7 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; @RestController @@ -41,7 +45,8 @@ description = "Platform level APIs intended for lower level access to entities") public class PlatformEntitiesController { - private final EntityService _entityService; + private final EntityService _entityService; + private final CachingEntitySearchService _cachingEntitySearchService; private final ObjectMapper _objectMapper; private final AuthorizerChain _authorizerChain; @@ -55,7 +60,8 @@ public void initBinder(WebDataBinder binder) { @PostMapping(value = "/", produces = MediaType.APPLICATION_JSON_VALUE) public ResponseEntity> postEntities( - @RequestBody @Nonnull List metadataChangeProposals) { + @RequestBody @Nonnull List metadataChangeProposals, + @RequestParam(required = false, name = "async") Boolean async) { log.info("INGEST PROPOSAL proposal: {}", metadataChangeProposals); Authentication authentication = AuthenticationContext.getAuthentication(); @@ -77,9 +83,14 @@ public ResponseEntity> postEntities( throw new UnauthorizedException(actorUrnStr + " is unauthorized to edit entities."); } + boolean asyncBool = + Objects.requireNonNullElseGet( + async, () -> Boolean.parseBoolean(System.getenv("ASYNC_INGEST_DEFAULT"))); List> responses = proposals.stream() - .map(proposal -> MappingUtil.ingestProposal(proposal, actorUrnStr, _entityService)) + .map( + proposal -> + MappingUtil.ingestProposal(proposal, actorUrnStr, _entityService, asyncBool)) .collect(Collectors.toList()); if (responses.stream().anyMatch(Pair::getSecond)) { return ResponseEntity.status(HttpStatus.CREATED) diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 0eb3e2d6b8c6e..c87820465dc88 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -25,12 +25,13 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.entity.Aspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.RollbackRunResult; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.transactions.AspectsBatch; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.metrics.MetricUtils; @@ -441,7 +442,9 @@ public static boolean authorizeProposals( public static Pair ingestProposal( com.linkedin.mxe.MetadataChangeProposal serviceProposal, String actorUrn, - EntityService entityService) { + EntityService entityService, + boolean async) { + // TODO: Use the actor present in the IC. Timer.Context context = MetricUtils.timer("postEntity").time(); final com.linkedin.common.AuditStamp auditStamp = @@ -462,10 +465,14 @@ public static Pair ingestProposal( AspectsBatch batch = AspectsBatchImpl.builder() - .mcps(proposalStream.collect(Collectors.toList()), entityService.getEntityRegistry()) + .mcps( + proposalStream.collect(Collectors.toList()), + auditStamp, + entityService.getEntityRegistry(), + entityService.getSystemEntityClient()) .build(); - Set proposalResult = entityService.ingestProposal(batch, auditStamp, false); + Set proposalResult = entityService.ingestProposal(batch, async); Urn urn = proposalResult.stream().findFirst().get().getUrn(); return new Pair<>( diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 06640ba13fb8b..17be5a60816d3 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -217,7 +217,7 @@ public void testIngestDataset() { .build(); datasetAspects.add(glossaryTerms); - _entitiesController.postEntities(datasetAspects); + _entitiesController.postEntities(datasetAspects, false); } // @Test diff --git a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java index 91e9e4fd4671e..fdf99cdc303c1 100644 --- a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java +++ b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java @@ -80,8 +80,7 @@ public RecordTemplate getAspect(@Nonnull Urn urn, @Nonnull String aspectName, lo @Override public Map> getLatestEnvelopedAspects( - @Nonnull String entityName, @Nonnull Set urns, @Nonnull Set aspectNames) - throws URISyntaxException { + @Nonnull Set urns, @Nonnull Set aspectNames) throws URISyntaxException { Urn urn = UrnUtils.getUrn(DATASET_URN); Map> envelopedAspectMap = new HashMap<>(); List aspects = new ArrayList<>(); diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index 598c252b4f766..64ae3632c353a 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -6,10 +6,12 @@ import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; +import com.linkedin.entity.Aspect; import com.linkedin.entity.Entity; import com.linkedin.entity.EntityResponse; import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultV2; import com.linkedin.metadata.graph.LineageDirection; @@ -38,7 +40,7 @@ import javax.annotation.Nullable; // Consider renaming this to datahub client. -public interface EntityClient { +public interface EntityClient extends AspectRetriever { @Nullable public EntityResponse getV2( @@ -623,4 +625,12 @@ public void producePlatformEvent( public void rollbackIngestion(@Nonnull String runId, @Nonnull Authentication authentication) throws Exception; + + default Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) + throws RemoteInvocationException, URISyntaxException { + return getV2(urn.getEntityType(), urn, Set.of(aspectName), null) + .getAspects() + .get(aspectName) + .getValue(); + } } diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java index babb290655d3d..dfad20b5f52b2 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemEntityClient.java @@ -2,7 +2,9 @@ import com.datahub.authentication.Authentication; import com.linkedin.common.urn.Urn; +import com.linkedin.entity.Aspect; import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.aspect.plugins.validation.AspectRetriever; import com.linkedin.metadata.config.cache.client.EntityClientCacheConfig; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.PlatformEvent; @@ -14,7 +16,7 @@ import javax.annotation.Nullable; /** Adds entity/aspect cache and assumes system authentication */ -public interface SystemEntityClient extends EntityClient { +public interface SystemEntityClient extends EntityClient, AspectRetriever { EntityClientCache getEntityClientCache(); @@ -98,4 +100,12 @@ default String ingestProposal( default void setWritable(boolean canWrite) throws RemoteInvocationException { setWritable(canWrite, getSystemAuthentication()); } + + default Aspect getLatestAspectObject(@Nonnull Urn urn, @Nonnull String aspectName) + throws RemoteInvocationException, URISyntaxException { + return getV2(urn.getEntityType(), urn, Set.of(aspectName), getSystemAuthentication()) + .getAspects() + .get(aspectName) + .getValue(); + } } diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index f14dc2e8b2918..c5b019e85e0c9 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -19,10 +19,13 @@ import com.linkedin.metadata.aspect.VersionedAspect; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.AspectUtils; +import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.IngestResult; -import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl; -import com.linkedin.metadata.entity.transactions.AspectsBatch; +import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.entity.ebean.batch.MCLBatchItemImpl; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -80,10 +83,10 @@ public class AspectResource extends CollectionResourceTaskTemplate _entityService; @VisibleForTesting - void setEntityService(EntityService entityService) { + void setEntityService(EntityService entityService) { _entityService = entityService; } @@ -242,33 +245,26 @@ public Task ingestProposal( final AuditStamp auditStamp = new AuditStamp().setTime(_clock.millis()).setActor(Urn.createFromString(actorUrnStr)); - return RestliUtil.toTask( - () -> { - log.debug("Proposal: {}", metadataChangeProposal); - try { - final AspectsBatch batch; - if (asyncBool) { - // if async we'll expand the getAdditionalChanges later, no need to do this early - batch = - AspectsBatchImpl.builder() - .mcps(List.of(metadataChangeProposal), _entityService.getEntityRegistry()) - .build(); - } else { - Stream proposalStream = - Stream.concat( - Stream.of(metadataChangeProposal), - AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService) - .stream()); + return RestliUtil.toTask(() -> { + log.debug("Proposal: {}", metadataChangeProposal); + try { + final AspectsBatch batch; + if (asyncBool) { + // if async we'll expand the getAdditionalChanges later, no need to do this early + batch = AspectsBatchImpl.builder() + .mcps(List.of(metadataChangeProposal), auditStamp, _entityService.getEntityRegistry(), _entityService.getSystemEntityClient()) + .build(); + } else { + Stream proposalStream = Stream.concat(Stream.of(metadataChangeProposal), + AspectUtils.getAdditionalChanges(metadataChangeProposal, _entityService).stream()); - batch = - AspectsBatchImpl.builder() - .mcps( - proposalStream.collect(Collectors.toList()), - _entityService.getEntityRegistry()) - .build(); - } + batch = AspectsBatchImpl.builder() + .mcps(proposalStream.collect(Collectors.toList()), auditStamp, _entityService.getEntityRegistry(), _entityService.getSystemEntityClient()) + .build(); + } - Set results = _entityService.ingestProposal(batch, auditStamp, asyncBool); + Set results = + _entityService.ingestProposal(batch, asyncBool); IngestResult one = results.stream().findFirst().get(); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index ddf5efa5027ca..dfd986c2ebea0 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -121,7 +121,7 @@ public class EntityResource extends CollectionResourceTaskTemplate _entityService; @Inject @Named("searchService") diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index bf07d0eb9dd5b..7c7c25ad3492c 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -34,7 +34,7 @@ public static String restoreIndices( @Nullable Integer start, @Nullable Integer batchSize, @Nonnull Authorizer authorizer, - @Nonnull EntityService entityService) { + @Nonnull EntityService entityService) { Authentication authentication = AuthenticationContext.getAuthentication(); EntitySpec resourceSpec = null; if (StringUtils.isNotBlank(urn)) { diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index d6eeb1a01ac15..e3534875c6cd2 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -20,7 +20,7 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.UpdateAspectResult; -import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem; +import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -52,9 +52,8 @@ public void setup() { _entityRegistry = new MockEntityRegistry(); _updateIndicesService = mock(UpdateIndicesService.class); _preProcessHooks = mock(PreProcessHooks.class); - _entityService = - new EntityServiceImpl( - _aspectDao, _producer, _entityRegistry, false, _updateIndicesService, _preProcessHooks); + _entityService = new EntityServiceImpl(_aspectDao, _producer, _entityRegistry, false, + _updateIndicesService, _preProcessHooks); _authorizer = mock(Authorizer.class); _aspectResource.setAuthorizer(_authorizer); _aspectResource.setEntityService(_entityService); @@ -82,13 +81,13 @@ public void testAsyncDefaultAspects() throws URISyntaxException { reset(_producer, _aspectDao); - UpsertBatchItem req = - UpsertBatchItem.builder() + MCPUpsertBatchItem req = MCPUpsertBatchItem.builder() .urn(urn) .aspectName(mcp.getAspectName()) .aspect(mcp.getAspect()) + .auditStamp(new AuditStamp()) .metadataChangeProposal(mcp) - .build(_entityRegistry); + .build(_entityRegistry, _entityService.getSystemEntityClient()); when(_aspectDao.runInTransactionWithRetry(any(), any(), anyInt())) .thenReturn( List.of( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java index eab482c7bab27..c4216962c134c 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java @@ -10,9 +10,12 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.GenericAspect; +import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.MetadataChangeProposal; import java.util.Collections; import java.util.HashMap; @@ -35,7 +38,7 @@ private AspectUtils() {} public static List getAdditionalChanges( @Nonnull MetadataChangeProposal metadataChangeProposal, - @Nonnull EntityService entityService, + @Nonnull EntityService entityService, boolean onPrimaryKeyInsertOnly) { // No additional changes for unsupported operations @@ -174,4 +177,41 @@ public static AuditStamp getAuditStamp(Urn actor) { auditStamp.setActor(actor); return auditStamp; } + + public static AspectSpec validateAspect(MetadataChangeLog mcl, EntitySpec entitySpec) { + if (!mcl.hasAspectName() + || (!ChangeType.DELETE.equals(mcl.getChangeType()) && !mcl.hasAspect())) { + throw new UnsupportedOperationException( + String.format( + "Aspect and aspect name is required for create and update operations. changeType: %s entityName: %s hasAspectName: %s hasAspect: %s", + mcl.getChangeType(), entitySpec.getName(), mcl.hasAspectName(), mcl.hasAspect())); + } + + AspectSpec aspectSpec = entitySpec.getAspectSpec(mcl.getAspectName()); + + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Unknown aspect %s for entity %s", mcl.getAspectName(), mcl.getEntityType())); + } + + return aspectSpec; + } + + public static AspectSpec validateAspect(MetadataChangeProposal mcp, EntitySpec entitySpec) { + if (!mcp.hasAspectName() || !mcp.hasAspect()) { + throw new UnsupportedOperationException( + "Aspect and aspect name is required for create and update operations"); + } + + AspectSpec aspectSpec = entitySpec.getAspectSpec(mcp.getAspectName()); + + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Unknown aspect %s for entity %s", mcp.getAspectName(), mcp.getEntityType())); + } + + return aspectSpec; + } } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index 3b71c698e0c9f..2cd1aadf7665d 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -46,7 +46,7 @@ @RequiredArgsConstructor public class DeleteEntityService { - private final EntityService _entityService; + private final EntityService _entityService; private final GraphService _graphService; private static final Integer ELASTIC_BATCH_DELETE_SLEEP_SEC = 5; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 8654df4435cd6..89b0e5ba9a558 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -12,9 +12,10 @@ import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.UpsertItem; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListUrnsResult; @@ -33,7 +34,7 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -public interface EntityService { +public interface EntityService { /** * Just whether the entity/aspect exists @@ -119,15 +120,12 @@ Map getEntitiesVersionedV2( /** * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects * - * @param entityName name of the entity to fetch * @param urns set of urns to fetch * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link EnvelopedAspect} object */ Map> getLatestEnvelopedAspects( - // TODO: entityName is unused, can we remove this as a param? - @Nonnull String entityName, @Nonnull Set urns, @Nonnull Set aspectNames) - throws URISyntaxException; + @Nonnull Set urns, @Nonnull Set aspectNames) throws URISyntaxException; /** * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects @@ -169,10 +167,7 @@ List ingestAspects( @Nullable SystemMetadata systemMetadata); List ingestAspects( - @Nonnull final AspectsBatch aspectsBatch, - @Nonnull final AuditStamp auditStamp, - boolean emitMCL, - boolean overwrite); + @Nonnull final AspectsBatch aspectsBatch, boolean emitMCL, boolean overwrite); /** * Ingests (inserts) a new version of an entity aspect & emits a {@link @@ -233,7 +228,7 @@ Pair, Boolean> alwaysProduceMCLAsync( @Nonnull AuditStamp auditStamp, @Nonnull final ChangeType changeType); - RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); + // RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); @Deprecated void ingestEntities( @@ -250,7 +245,7 @@ void ingestEntity( @Nonnull AuditStamp auditStamp, @Nonnull SystemMetadata systemMetadata); - void setRetentionService(RetentionService retentionService); + void setRetentionService(RetentionService retentionService); AspectSpec getKeyAspectSpec(@Nonnull final Urn urn); @@ -304,8 +299,7 @@ RollbackRunResult rollbackRun( RollbackRunResult rollbackWithConditions( List aspectRows, Map conditions, boolean hardDelete); - Set ingestProposal( - AspectsBatch aspectsBatch, AuditStamp auditStamp, final boolean async); + Set ingestProposal(AspectsBatch aspectsBatch, final boolean async); /** * If you have more than 1 proposal use the {AspectsBatch} method @@ -343,4 +337,8 @@ BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContai * @param systemEntityClient system entity client */ void setSystemEntityClient(SystemEntityClient systemEntityClient); + + SystemEntityClient getSystemEntityClient(); + + RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java index 3e72a763fb17c..d3f8b507bb14a 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestResult.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.entity; import com.linkedin.common.urn.Urn; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; +import com.linkedin.metadata.aspect.batch.BatchItem; import lombok.Builder; import lombok.Value; @@ -9,7 +9,7 @@ @Value public class IngestResult { Urn urn; - AbstractBatchItem request; + BatchItem request; boolean publishedMCL; boolean processedMCL; boolean publishedMCP; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java index 51519f48bd975..ae33b72010ce2 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java @@ -7,9 +7,10 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.AspectsBatch; +import com.linkedin.metadata.aspect.batch.UpsertItem; import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs; import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult; -import com.linkedin.metadata.entity.transactions.AspectsBatch; import com.linkedin.metadata.key.DataHubRetentionKey; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -30,16 +31,16 @@ import lombok.Value; /** - * Service coupled with an {@link EntityServiceImpl} to handle aspect record retention. + * Service coupled with an {@link EntityService} to handle aspect record retention. * *

TODO: This class is abstract with storage-specific implementations. It'd be nice to pull - * storage and retention concerns apart, let (into {@link AspectDao}) deal with storage, and merge - * all retention concerns into a single class. + * storage and retention concerns apart, let AspectDaos deal with storage, and merge all retention + * concerns into a single class. */ -public abstract class RetentionService { +public abstract class RetentionService { protected static final String ALL = "*"; - protected abstract EntityService getEntityService(); + protected abstract EntityService getEntityService(); /** * Fetch retention policies given the entityName and aspectName Uses the entity service to fetch @@ -120,13 +121,14 @@ public boolean setRetention( new AuditStamp() .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) .setTime(System.currentTimeMillis()); - AspectsBatch batch = buildAspectsBatch(List.of(keyProposal, aspectProposal)); + AspectsBatch batch = buildAspectsBatch(List.of(keyProposal, aspectProposal), auditStamp); - return getEntityService().ingestProposal(batch, auditStamp, false).stream() + return getEntityService().ingestProposal(batch, false).stream() .anyMatch(IngestResult::isSqlCommitted); } - protected abstract AspectsBatch buildAspectsBatch(List mcps); + protected abstract AspectsBatch buildAspectsBatch( + List mcps, @Nonnull AuditStamp auditStamp); /** * Delete the retention policy set for given entity and aspect. diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java index a10c90bc45320..515e08646f9ed 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java @@ -3,7 +3,7 @@ import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.entity.transactions.AbstractBatchItem; +import com.linkedin.metadata.aspect.batch.UpsertItem; import com.linkedin.mxe.MetadataAuditOperation; import com.linkedin.mxe.SystemMetadata; import java.util.concurrent.Future; @@ -14,7 +14,7 @@ @Value public class UpdateAspectResult { Urn urn; - AbstractBatchItem request; + UpsertItem request; RecordTemplate oldValue; RecordTemplate newValue; SystemMetadata oldSystemMetadata; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AbstractBatchItem.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AbstractBatchItem.java deleted file mode 100644 index 155385c62ecef..0000000000000 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AbstractBatchItem.java +++ /dev/null @@ -1,94 +0,0 @@ -package com.linkedin.metadata.entity.transactions; - -import static com.linkedin.metadata.Constants.*; - -import com.linkedin.common.urn.Urn; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.models.registry.template.AspectTemplateEngine; -import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - -public abstract class AbstractBatchItem { - // urn an urn associated with the new aspect - public abstract Urn getUrn(); - - // aspectName name of the aspect being inserted - public abstract String getAspectName(); - - public abstract SystemMetadata getSystemMetadata(); - - public abstract ChangeType getChangeType(); - - public abstract EntitySpec getEntitySpec(); - - public abstract AspectSpec getAspectSpec(); - - public abstract MetadataChangeProposal getMetadataChangeProposal(); - - public abstract void validateUrn(EntityRegistry entityRegistry, Urn urn); - - @Nonnull - protected static SystemMetadata generateSystemMetadataIfEmpty( - @Nullable SystemMetadata systemMetadata) { - if (systemMetadata == null) { - systemMetadata = new SystemMetadata(); - systemMetadata.setRunId(DEFAULT_RUN_ID); - systemMetadata.setLastObserved(System.currentTimeMillis()); - } - return systemMetadata; - } - - protected static AspectSpec validateAspect(MetadataChangeProposal mcp, EntitySpec entitySpec) { - if (!mcp.hasAspectName() || !mcp.hasAspect()) { - throw new UnsupportedOperationException( - "Aspect and aspect name is required for create and update operations"); - } - - AspectSpec aspectSpec = entitySpec.getAspectSpec(mcp.getAspectName()); - - if (aspectSpec == null) { - throw new RuntimeException( - String.format( - "Unknown aspect %s for entity %s", mcp.getAspectName(), mcp.getEntityType())); - } - - return aspectSpec; - } - - /** - * Validates that a change type is valid for the given aspect - * - * @param changeType - * @param aspectSpec - * @return - */ - protected static boolean isValidChangeType(ChangeType changeType, AspectSpec aspectSpec) { - if (aspectSpec.isTimeseries()) { - // Timeseries aspects only support UPSERT - return ChangeType.UPSERT.equals(changeType); - } else { - if (ChangeType.PATCH.equals(changeType)) { - return supportsPatch(aspectSpec); - } else { - return ChangeType.UPSERT.equals(changeType); - } - } - } - - protected static boolean supportsPatch(AspectSpec aspectSpec) { - // Limit initial support to defined templates - if (!AspectTemplateEngine.SUPPORTED_TEMPLATES.contains(aspectSpec.getName())) { - // Prevent unexpected behavior for aspects that do not currently have 1st class patch support, - // specifically having array based fields that require merging without specifying merge - // behavior can get into bad states - throw new UnsupportedOperationException( - "Aspect: " + aspectSpec.getName() + " does not currently support patch " + "operations."); - } - return true; - } -} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AspectsBatch.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AspectsBatch.java deleted file mode 100644 index 4f2cf6073bdac..0000000000000 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/transactions/AspectsBatch.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.linkedin.metadata.entity.transactions; - -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -public interface AspectsBatch { - List getItems(); - - default boolean containsDuplicateAspects() { - return getItems().stream() - .map(i -> String.format("%s_%s", i.getClass().getName(), i.hashCode())) - .distinct() - .count() - != getItems().size(); - } - - default Map> getUrnAspectsMap() { - return getItems().stream() - .map(aspect -> Map.entry(aspect.getUrn().toString(), aspect.getAspectName())) - .collect( - Collectors.groupingBy( - Map.Entry::getKey, Collectors.mapping(Map.Entry::getValue, Collectors.toSet()))); - } -} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java index b0f42231b27f3..81bfcaab74ddb 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SystemMetadataUtils.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.Constants; import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -14,4 +15,9 @@ public static SystemMetadata createDefaultSystemMetadata() { .setRunId(Constants.DEFAULT_RUN_ID) .setLastObserved(System.currentTimeMillis()); } + + public static SystemMetadata generateSystemMetadataIfEmpty( + @Nullable SystemMetadata systemMetadata) { + return systemMetadata == null ? createDefaultSystemMetadata() : systemMetadata; + } } From dcc55cab2b32bb0459cfc7e85dba8133cb13bd4a Mon Sep 17 00:00:00 2001 From: Kunal-kankriya <127090035+Kunal-kankriya@users.noreply.github.com> Date: Tue, 9 Jan 2024 17:53:57 +0530 Subject: [PATCH 02/16] fix(tests): glossary test, search test, managed ingestion test (#9584) --- .../cypress/cypress/e2e/glossary/glossary.js | 11 ++-- .../cypress/e2e/glossary/glossaryTerm.js | 59 +++++++++---------- .../e2e/mutations/managed_ingestion.js | 5 +- .../e2e/search/query_and_filter_search.js | 53 ++++++++--------- .../tests/cypress/cypress/support/commands.js | 16 +++-- 5 files changed, 68 insertions(+), 76 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js index d3cb88bcb0295..0be80020267f3 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js @@ -4,17 +4,18 @@ describe("glossary", () => { const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; const datasetName = "cypress_logging_events"; const glossaryTerm = "CypressGlosssaryTerm"; - const glossaryTermGroup = "CypressGlosssaryGroup"; - cy.login(); + const glossaryTermGroup = "CypressGlossaryGroup"; + cy.loginWithCredentials(); cy.goToGlossaryList(); - + cy.clickOptionWithText("Add Term"); - cy.addViaModal(glossaryTerm, "Create Glossary Term"); + cy.addViaModal(glossaryTerm, "Create Glossary Term", "Created Glossary Term!"); cy.clickOptionWithText("Add Term Group"); - cy.addViaModal(glossaryTermGroup, "Create Term Group"); + cy.addViaModal(glossaryTermGroup, "Create Term Group", "Created Term Group!"); cy.addTermToDataset(urn, datasetName, glossaryTerm); + cy.waitTextVisible('Added Terms!') cy.goToGlossaryList(); cy.clickOptionWithText(glossaryTerm); diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js index 506825051207f..ef7787c28d4fa 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js @@ -1,17 +1,18 @@ -const glossaryTermUrl = - "/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressColumnInfoType/Related%20Entities"; -const SampleCypressHdfsDataset = "SampleCypressHdfsDataset"; +const glossaryTerms = { + glossaryTermUrl:"/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressColumnInfoType/Related%20Entities", + SampleCypressHdfsDataset:"SampleCypressHdfsDataset" +}; const applyTagFilter = (tag) => { - cy.get('[data-icon="filter"]').click(); - cy.contains("Filter").should("be.visible"); + cy.get('[aria-label="filter"]').should('be.visible').click() + cy.waitTextVisible("Filter"); cy.get(`[data-testid="facet-tags-${tag}"]`).click({ force: true }); }; const applyAdvancedSearchFilter = (filterType, value) => { cy.get('[aria-label="filter"]').click(); cy.get('[id="search-results-advanced-search"]').click(); - cy.get('[class="anticon anticon-plus sc-dvXYtj iduHXF"]').click(); + cy.clickOptionWithText('Add Filter'); if (filterType === "Tag") { applyTagFilterInSearch(value); @@ -21,21 +22,18 @@ const applyAdvancedSearchFilter = (filterType, value) => { }; const applyBasicSearchFilter = () => { - cy.contains("Basic").should("be.visible"); - cy.get('[class="anticon anticon-plus sc-dvXYtj iduHXF"]').click(); + cy.waitTextVisible("Basic"); + cy.clickOptionWithText('Add Filter'); }; const searchByConceptsWithLogicalOperator = (concept1, concept2, operator) => { - cy.contains("Filters"); + cy.waitTextVisible("Filters"); applyBasicSearchFilter(); - applyTagFilterInSearch(concept1); - - cy.get('[class="anticon anticon-plus sc-dvXYtj iduHXF"]').click(); + cy.clickOptionWithText('Add Filter'); applyDescriptionFilterInAdvancedSearch(concept2); - cy.get('[title="all filters"]').click(); - cy.contains(operator).click({ force: true }); + cy.clickOptionWithText(operator) }; // Helper function to apply tag filter in basic search @@ -46,62 +44,59 @@ const applyTagFilterInSearch = (tag) => { // Helper function to apply description filter in advanced search const applyDescriptionFilterInAdvancedSearch = (value) => { - cy.get('[data-testid="adv-search-add-filter-description"]').click({ - force: true, - }); + cy.get('[data-testid="adv-search-add-filter-description"]').click({ force: true }); cy.get('[data-testid="edit-text-input"]').type(value); cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); }; describe("glossaryTerm", () => { beforeEach(() => { - cy.loginWithCredentials(); - cy.visit(glossaryTermUrl); + cy.loginWithCredentials(); + cy.visit(glossaryTerms.glossaryTermUrl); }); it("can visit related entities", () => { cy.contains("of 0").should("not.exist"); - cy.contains(/of [0-9]+/); + cy.waitTextVisible(/of [0-9]+/); }); it("can search related entities by query", () => { cy.get('[placeholder="Filter entities..."]').click().type("logging{enter}"); cy.contains("of 0").should("not.exist"); - cy.contains(/of 1/); - cy.contains("cypress_logging_events"); - cy.contains(SampleCypressHdfsDataset).should("not.exist"); + cy.waitTextVisible(/of 1/); + cy.waitTextVisible("cypress_logging_events"); + cy.contains(glossaryTerms.SampleCypressHdfsDataset).should("not.exist"); }); it("can apply filters on related entities", () => { applyTagFilter("urn:li:tag:Cypress2"); cy.contains("cypress_logging_events").should("not.exist"); - cy.contains(SampleCypressHdfsDataset); + cy.waitTextVisible(glossaryTerms.SampleCypressHdfsDataset); }); it("can search related entities by a specific tag using advanced search", () => { applyAdvancedSearchFilter("Tag", "Cypress2"); - cy.contains(SampleCypressHdfsDataset); - cy.contains("of 1"); + cy.waitTextVisible(glossaryTerms.SampleCypressHdfsDataset); + cy.waitTextVisible("of 1"); }); it("can search related entities by AND-ing two concepts using search", () => { applyAdvancedSearchFilter(); - - cy.get('[class="anticon anticon-plus sc-dvXYtj iduHXF"]').click(); + cy.clickOptionWithText('Add Filter'); cy.get('[data-testid="adv-search-add-filter-description"]').click({ force: true, }); cy.get('[data-testid="edit-text-input"]').type("my hdfs"); cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); - cy.contains(SampleCypressHdfsDataset); - cy.contains("of 1"); + cy.waitTextVisible(glossaryTerms.SampleCypressHdfsDataset); + cy.waitTextVisible("of 1"); }); it("can search related entities by OR-ing two concepts using search", () => { applyAdvancedSearchFilter("Description", "single log event"); applyBasicSearchFilter("Tag", "Cypress2"); searchByConceptsWithLogicalOperator("Cypress", "Tag", "any filter"); - cy.contains(SampleCypressHdfsDataset); - cy.contains("cypress_logging_events"); + cy.waitTextVisible(glossaryTerms.SampleCypressHdfsDataset); + cy.waitTextVisible("cypress_logging_events"); }); }); \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index 3d052695e818f..05f94c94bfe2a 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -11,12 +11,11 @@ describe("run managed ingestion", () => { cy.login(); cy.goToIngestionPage(); cy.clickOptionWithText("Create new source"); - cy.clickOptionWithText("Other"); + cy.clickOptionWithTextToScrollintoView("Other"); cy.waitTextVisible("source-type"); readyToTypeEditor().type('{ctrl}a').clear() - readyToTypeEditor().type("source:"); - readyToTypeEditor().type("{enter}"); + readyToTypeEditor().type("source:{enter}"); readyToTypeEditor().type(" type: demo-data"); readyToTypeEditor().type("{enter}"); // no space because the editor starts new line at same indentation diff --git a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js index ee927feeaea58..a0bd4de2b8ed7 100644 --- a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js +++ b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js @@ -1,14 +1,3 @@ -const datasetNames = { - dashboardsType: "Baz Dashboard", - pipelinesType: "Users", - MlmoduleType: "cypress-model", - glossaryTermsType: "CypressColumnInfoType", - tags: "some-cypress-feature-1", - hivePlatform: "cypress_logging_events", - airflowPlatform: "User Creations", - hdfsPlatform: "SampleHdfsDataset" -}; - const searchToExecute = (value) => { cy.get("input[data-testid=search-input]").eq(0).type(`${value}{enter}`); cy.waitTextPresent("Type"); @@ -27,32 +16,39 @@ const verifyFilteredEntity = (text) => { cy.get('.ant-typography').contains(text).should('be.visible'); }; +const clickAndVerifyEntity = (entity) => { + cy.get('[class*="entityUrn-urn"]').first() + .find('a[href*="urn:li"] span[class^="ant-typography"]').last().invoke('text') + .then((text) => { + cy.contains(text).click(); + verifyFilteredEntity(text); + verifyFilteredEntity(entity); + }); + } + describe("auto-complete dropdown, filter plus query search test", () => { beforeEach(() => { cy.loginWithCredentials(); cy.visit('/'); }); - + it.skip("Verify the 'filter by type' section + query", () => { //Dashboard searchToExecute("*"); selectFilteredEntity("Type", "Dashboards", "filter__entityType"); - cy.clickOptionWithText(datasetNames.dashboardsType); - verifyFilteredEntity('Dashboard'); + clickAndVerifyEntity('Dashboard') //Ml Models searchToExecute("*"); - selectFilteredEntity("Type", "ML Models", "filter__entityType"); - cy.clickOptionWithText(datasetNames.MlmoduleType); - verifyFilteredEntity('ML Model'); + selectFilteredEntity("Type", "ML Models", "filter__entityType"); + clickAndVerifyEntity('ML Model'); //Piplines searchToExecute("*"); - selectFilteredEntity("Type", "Pipelines", "filter__entityType"); - cy.clickOptionWithText(datasetNames.pipelinesType); - verifyFilteredEntity('Pipeline'); + selectFilteredEntity("Type", "Pipelines", "filter__entityType"); + clickAndVerifyEntity('Pipeline'); }); @@ -61,8 +57,8 @@ describe("auto-complete dropdown, filter plus query search test", () => { //Glossary Term searchToExecute("*"); selectFilteredEntity("Type", "Glossary Terms", "filter__entityType"); - cy.clickOptionWithText(datasetNames.glossaryTermsType); - verifyFilteredEntity('Glossary Term'); + clickAndVerifyEntity('Glossary Term') + }); it("Verify the 'filter by platform' section + query", () => { @@ -70,20 +66,17 @@ describe("auto-complete dropdown, filter plus query search test", () => { //Hive searchToExecute("*"); selectFilteredEntity("Platform", "Hive", "filter_platform"); - cy.clickOptionWithText(datasetNames.hivePlatform); - verifyFilteredEntity('Hive'); + clickAndVerifyEntity('Hive') //HDFS searchToExecute("*"); selectFilteredEntity("Platform", "HDFS", "filter_platform"); - cy.clickOptionWithText(datasetNames.hdfsPlatform); - verifyFilteredEntity('HDFS'); + clickAndVerifyEntity('HDFS') //Airflow searchToExecute("*"); selectFilteredEntity("Platform", "Airflow", "filter_platform"); - cy.clickOptionWithText(datasetNames.airflowPlatform); - verifyFilteredEntity('Airflow'); + clickAndVerifyEntity('Airflow') }); it("Verify the 'filter by tag' section + query", () => { @@ -91,8 +84,8 @@ describe("auto-complete dropdown, filter plus query search test", () => { //CypressFeatureTag searchToExecute("*"); selectFilteredEntity("Tag", "CypressFeatureTag", "filter_tags"); - cy.clickOptionWithText(datasetNames.tags); + clickAndVerifyEntity('Tags') cy.mouseover('[data-testid="tag-CypressFeatureTag"]'); - verifyFilteredEntity('Feature'); + verifyFilteredEntity('CypressFeatureTag'); }); }); diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index 5130bfd6dc4fe..96ff8f4a50c8b 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -27,8 +27,8 @@ Cypress.Commands.add('login', () => { method: 'POST', url: '/logIn', body: { - username: Cypress.env('ADMIN_USERNAME'), - password: Cypress.env('ADMIN_PASSWORD'), + username: Cypress.env('ADMIN_USERNAME'), + password: Cypress.env('ADMIN_PASSWORD'), }, retryOnStatusCodeFailure: true, }); @@ -66,7 +66,6 @@ Cypress.Commands.add("logout", () => { Cypress.Commands.add("goToGlossaryList", () => { cy.visit("/glossary"); cy.waitTextVisible("Glossary"); - cy.wait(3000); }); Cypress.Commands.add("goToDomainList", () => { @@ -160,7 +159,11 @@ Cypress.Commands.add("openThreeDotDropdown", () => { }); Cypress.Commands.add("clickOptionWithText", (text) => { - cy.contains(text).click(); + cy.contains(text).should('be.visible').click(); +}); + +Cypress.Commands.add("clickOptionWithTextToScrollintoView", (text) => { + cy.contains(text).scrollIntoView().click(); }); Cypress.Commands.add("deleteFromDropdown", () => { @@ -175,10 +178,11 @@ Cypress.Commands.add("addViaFormModal", (text, modelHeader) => { cy.get(".ant-modal-footer > button:nth-child(2)").click(); }); -Cypress.Commands.add("addViaModal", (text, modelHeader) => { +Cypress.Commands.add("addViaModal", (text, modelHeader,verifyMessage) => { cy.waitTextVisible(modelHeader); cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(text); cy.get(".ant-modal-footer > button:nth-child(2)").click(); + cy.contains(verifyMessage).should('be.visible'); }); Cypress.Commands.add("ensureTextNotPresent", (text) => { @@ -333,7 +337,7 @@ Cypress.Commands.add("addGroupMember", (group_name, group_urn, member_name) => { Cypress.Commands.add("createGlossaryTermGroup", (term_group_name) => { cy.goToGlossaryList(); - cy.clickOptionWithTestId("add-term-group-button"); + cy.clickOptionWithText('Add Term Group'); cy.waitTextVisible("Create Term Group"); cy.enterTextInTestId("create-glossary-entity-modal-name", term_group_name); cy.clickOptionWithTestId("glossary-entity-modal-create-button"); From 5bfd674f08260c1ef96857de55df9a2fee9598b8 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Tue, 9 Jan 2024 21:39:15 +0530 Subject: [PATCH 03/16] fix(owner): last modified on adding owner (#9553) --- .../domain/CreateDomainResolver.java | 10 +- .../glossary/CreateGlossaryNodeResolver.java | 15 +- .../glossary/CreateGlossaryTermResolver.java | 14 +- .../resolvers/mutate/AddOwnerResolver.java | 6 +- .../resolvers/mutate/AddOwnersResolver.java | 6 +- .../mutate/BatchAddOwnersResolver.java | 6 +- .../mutate/BatchRemoveOwnersResolver.java | 19 +-- .../resolvers/mutate/RemoveOwnerResolver.java | 15 +- .../resolvers/mutate/util/OwnerUtils.java | 154 ++++++++---------- .../resolvers/tag/CreateTagResolver.java | 11 +- .../datahub/graphql/utils/OwnerUtilsTest.java | 110 +++++++++++++ 11 files changed, 201 insertions(+), 165 deletions(-) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java index 9099394d32bd0..19809170aad38 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/CreateDomainResolver.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; -import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*; import static com.linkedin.metadata.Constants.*; import com.linkedin.common.AuditStamp; @@ -16,7 +15,6 @@ import com.linkedin.datahub.graphql.exception.DataHubGraphQLException; import com.linkedin.datahub.graphql.generated.CreateDomainInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; -import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.domain.DomainProperties; @@ -100,14 +98,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws String domainUrn = _entityClient.ingestProposal(proposal, context.getAuthentication(), false); - OwnershipType ownershipType = OwnershipType.TECHNICAL_OWNER; - if (!_entityService.exists( - UrnUtils.getUrn(mapOwnershipTypeToEntity(ownershipType.name())))) { - log.warn("Technical owner does not exist, defaulting to None ownership."); - ownershipType = OwnershipType.NONE; - } OwnerUtils.addCreatorAsOwner( - context, domainUrn, OwnerEntityType.CORP_USER, ownershipType, _entityService); + context, domainUrn, OwnerEntityType.CORP_USER, _entityService); return domainUrn; } catch (DataHubGraphQLException e) { throw e; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java index 815b4662e1ed2..6a204286ba44e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryNodeResolver.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; -import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*; import static com.linkedin.metadata.Constants.*; import com.linkedin.common.urn.GlossaryNodeUrn; @@ -13,7 +12,6 @@ import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateGlossaryEntityInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; -import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.entity.client.EntityClient; @@ -72,19 +70,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws String glossaryNodeUrn = _entityClient.ingestProposal(proposal, context.getAuthentication(), false); - OwnershipType ownershipType = OwnershipType.TECHNICAL_OWNER; - if (!_entityService.exists( - UrnUtils.getUrn(mapOwnershipTypeToEntity(ownershipType.name())))) { - log.warn("Technical owner does not exist, defaulting to None ownership."); - ownershipType = OwnershipType.NONE; - } - OwnerUtils.addCreatorAsOwner( - context, - glossaryNodeUrn, - OwnerEntityType.CORP_USER, - ownershipType, - _entityService); + context, glossaryNodeUrn, OwnerEntityType.CORP_USER, _entityService); return glossaryNodeUrn; } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java index 90979fe918f71..147663059aa82 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/CreateGlossaryTermResolver.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; -import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*; import static com.linkedin.metadata.Constants.*; import com.linkedin.common.urn.GlossaryNodeUrn; @@ -14,7 +13,6 @@ import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateGlossaryEntityInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; -import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.entity.EntityResponse; @@ -88,19 +86,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws String glossaryTermUrn = _entityClient.ingestProposal(proposal, context.getAuthentication(), false); - OwnershipType ownershipType = OwnershipType.TECHNICAL_OWNER; - if (!_entityService.exists( - UrnUtils.getUrn(mapOwnershipTypeToEntity(ownershipType.name())))) { - log.warn("Technical owner does not exist, defaulting to None ownership."); - ownershipType = OwnershipType.NONE; - } OwnerUtils.addCreatorAsOwner( - context, - glossaryTermUrn, - OwnerEntityType.CORP_USER, - ownershipType, - _entityService); + context, glossaryTermUrn, OwnerEntityType.CORP_USER, _entityService); return glossaryTermUrn; } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index 9c0d009ff9b0e..d1ea81fab083c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -6,7 +6,6 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddOwnerInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -40,10 +39,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } OwnerInput ownerInput = ownerInputBuilder.build(); - if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) { - throw new AuthorizationException( - "Unauthorized to perform this action. Please contact your DataHub administrator."); - } + OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); return CompletableFuture.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index c64b2403364c8..96500f23303f8 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -6,7 +6,6 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.AddOwnersInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -34,10 +33,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return CompletableFuture.supplyAsync( () -> { - if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) { - throw new AuthorizationException( - "Unauthorized to perform this action. Please contact your DataHub administrator."); - } + OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); OwnerUtils.validateAddOwnerInput(owners, targetUrn, _entityService); try { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java index 94182835de159..4d57031954e31 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -5,7 +5,6 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchAddOwnersInput; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; @@ -74,10 +73,7 @@ private void validateInputResource(ResourceRefInput resource, QueryContext conte "Malformed input provided: owners cannot be applied to subresources."); } - if (!OwnerUtils.isAuthorizedToUpdateOwners(context, resourceUrn)) { - throw new AuthorizationException( - "Unauthorized to perform this action. Please contact your DataHub administrator."); - } + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); LabelUtils.validateResource( resourceUrn, resource.getSubResource(), resource.getSubResourceType(), _entityService); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java index 30e04ac36ee0f..c0996b07fb961 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java @@ -5,7 +5,6 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; @@ -14,7 +13,6 @@ import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; import java.util.List; -import java.util.Optional; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; @@ -32,10 +30,10 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw bindArgument(environment.getArgument("input"), BatchRemoveOwnersInput.class); final List owners = input.getOwnerUrns(); final List resources = input.getResources(); - final Optional maybeOwnershipTypeUrn = + final Urn ownershipTypeUrn = input.getOwnershipTypeUrn() == null - ? Optional.empty() - : Optional.of(Urn.createFromString(input.getOwnershipTypeUrn())); + ? null + : Urn.createFromString(input.getOwnershipTypeUrn()); final QueryContext context = environment.getContext(); return CompletableFuture.supplyAsync( @@ -46,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw try { // Then execute the bulk remove - batchRemoveOwners(owners, maybeOwnershipTypeUrn, resources, context); + batchRemoveOwners(owners, ownershipTypeUrn, resources, context); return true; } catch (Exception e) { log.error( @@ -71,24 +69,21 @@ private void validateInputResource(ResourceRefInput resource, QueryContext conte "Malformed input provided: owners cannot be removed from subresources."); } - if (!OwnerUtils.isAuthorizedToUpdateOwners(context, resourceUrn)) { - throw new AuthorizationException( - "Unauthorized to perform this action. Please contact your DataHub administrator."); - } + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); LabelUtils.validateResource( resourceUrn, resource.getSubResource(), resource.getSubResourceType(), _entityService); } private void batchRemoveOwners( List ownerUrns, - Optional maybeOwnershipTypeUrn, + Urn ownershipTypeUrn, List resources, QueryContext context) { log.debug("Batch removing owners. owners: {}, resources: {}", ownerUrns, resources); try { OwnerUtils.removeOwnersFromResources( ownerUrns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()), - maybeOwnershipTypeUrn, + ownershipTypeUrn, resources, UrnUtils.getUrn(context.getActorUrn()), _entityService); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java index 9827aa0666d19..ec62a951573e2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java @@ -6,14 +6,12 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; -import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveOwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; -import java.util.Optional; import java.util.concurrent.CompletableFuture; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -31,15 +29,12 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw Urn ownerUrn = Urn.createFromString(input.getOwnerUrn()); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - Optional maybeOwnershipTypeUrn = + Urn ownershipTypeUrn = input.getOwnershipTypeUrn() == null - ? Optional.empty() - : Optional.of(Urn.createFromString(input.getOwnershipTypeUrn())); + ? null + : Urn.createFromString(input.getOwnershipTypeUrn()); - if (!OwnerUtils.isAuthorizedToUpdateOwners(environment.getContext(), targetUrn)) { - throw new AuthorizationException( - "Unauthorized to perform this action. Please contact your DataHub administrator."); - } + OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); return CompletableFuture.supplyAsync( () -> { @@ -50,7 +45,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw ((QueryContext) environment.getContext()).getActorUrn()); OwnerUtils.removeOwnersFromResources( ImmutableList.of(ownerUrn), - maybeOwnershipTypeUrn, + ownershipTypeUrn, ImmutableList.of(new ResourceRefInput(input.getResourceUrn(), null, null)), actor, _entityService); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 15c3c14c7b8f6..55d408d3f7aab 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -15,6 +15,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.OwnerEntityType; import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.OwnershipType; @@ -26,8 +27,8 @@ import com.linkedin.mxe.MetadataChangeProposal; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; // TODO: Move to consuming from OwnerService @@ -42,22 +43,22 @@ public class OwnerUtils { private OwnerUtils() {} public static void addOwnersToResources( - List owners, - List resources, - Urn actor, + List ownerInputs, + List resourceRefs, + Urn actorUrn, EntityService entityService) { final List changes = new ArrayList<>(); - for (ResourceRefInput resource : resources) { + for (ResourceRefInput resource : resourceRefs) { changes.add( buildAddOwnersProposal( - owners, UrnUtils.getUrn(resource.getResourceUrn()), entityService)); + ownerInputs, UrnUtils.getUrn(resource.getResourceUrn()), actorUrn, entityService)); } - EntityUtils.ingestChangeProposals(changes, entityService, actor, false); + EntityUtils.ingestChangeProposals(changes, entityService, actorUrn, false); } public static void removeOwnersFromResources( List ownerUrns, - Optional maybeOwnershipTypeUrn, + @Nullable Urn ownershipTypeUrn, List resources, Urn actor, EntityService entityService) { @@ -66,7 +67,7 @@ public static void removeOwnersFromResources( changes.add( buildRemoveOwnersProposal( ownerUrns, - maybeOwnershipTypeUrn, + ownershipTypeUrn, UrnUtils.getUrn(resource.getResourceUrn()), actor, entityService)); @@ -75,7 +76,7 @@ public static void removeOwnersFromResources( } static MetadataChangeProposal buildAddOwnersProposal( - List owners, Urn resourceUrn, EntityService entityService) { + List owners, Urn resourceUrn, Urn actor, EntityService entityService) { Ownership ownershipAspect = (Ownership) EntityUtils.getAspectFromEntity( @@ -83,8 +84,9 @@ static MetadataChangeProposal buildAddOwnersProposal( Constants.OWNERSHIP_ASPECT_NAME, entityService, new Ownership()); + ownershipAspect.setLastModified(EntityUtils.getAuditStamp(actor)); for (OwnerInput input : owners) { - addOwner( + addOwnerToAspect( ownershipAspect, UrnUtils.getUrn(input.getOwnerUrn()), input.getType(), @@ -96,7 +98,7 @@ static MetadataChangeProposal buildAddOwnersProposal( public static MetadataChangeProposal buildRemoveOwnersProposal( List ownerUrns, - Optional maybeOwnershipTypeUrn, + @Nullable Urn ownershipTypeUrn, Urn resourceUrn, Urn actor, EntityService entityService) { @@ -108,36 +110,19 @@ public static MetadataChangeProposal buildRemoveOwnersProposal( entityService, new Ownership()); ownershipAspect.setLastModified(EntityUtils.getAuditStamp(actor)); - removeOwnersIfExists(ownershipAspect, ownerUrns, maybeOwnershipTypeUrn); + removeOwnersIfExists(ownershipAspect, ownerUrns, ownershipTypeUrn); return buildMetadataChangeProposalWithUrn( resourceUrn, Constants.OWNERSHIP_ASPECT_NAME, ownershipAspect); } - private static void addOwner( - Ownership ownershipAspect, Urn ownerUrn, OwnershipType type, Urn ownershipUrn) { + private static void addOwnerToAspect( + Ownership ownershipAspect, Urn ownerUrn, OwnershipType type, Urn ownershipTypeUrn) { if (!ownershipAspect.hasOwners()) { ownershipAspect.setOwners(new OwnerArray()); } - final OwnerArray ownerArray = new OwnerArray(ownershipAspect.getOwners()); - ownerArray.removeIf( - owner -> { - // Remove old ownership if it exists (check ownerUrn + type (entity & deprecated type)) - - // Owner is not what we are looking for - if (!owner.getOwner().equals(ownerUrn)) { - return false; - } - - // Check custom entity type urn if exists - if (owner.getTypeUrn() != null) { - return owner.getTypeUrn().equals(ownershipUrn); - } - - // Fall back to mapping deprecated type to the new ownership entity, if it matches remove - return mapOwnershipTypeToEntity(OwnershipType.valueOf(owner.getType().toString()).name()) - .equals(ownershipUrn.toString()); - }); + OwnerArray ownerArray = new OwnerArray(ownershipAspect.getOwners()); + removeExistingOwnerIfExists(ownerArray, ownerUrn, ownershipTypeUrn); Owner newOwner = new Owner(); @@ -150,49 +135,52 @@ private static void addOwner( : com.linkedin.common.OwnershipType.CUSTOM; newOwner.setType(gmsType); - newOwner.setTypeUrn(ownershipUrn); + newOwner.setTypeUrn(ownershipTypeUrn); newOwner.setSource(new OwnershipSource().setType(OwnershipSourceType.MANUAL)); newOwner.setOwner(ownerUrn); ownerArray.add(newOwner); ownershipAspect.setOwners(ownerArray); } + private static void removeExistingOwnerIfExists( + OwnerArray ownerArray, Urn ownerUrn, Urn ownershipTypeUrn) { + ownerArray.removeIf( + owner -> { + // Remove old ownership if it exists (check ownerUrn + type (entity & deprecated type)) + return isOwnerEqual(owner, ownerUrn, ownershipTypeUrn); + }); + } + + public static boolean isOwnerEqual( + @Nonnull Owner owner, @Nonnull Urn ownerUrn, @Nullable Urn ownershipTypeUrn) { + if (!owner.getOwner().equals(ownerUrn)) { + return false; + } + if (owner.getTypeUrn() != null) { + return owner.getTypeUrn().equals(ownershipTypeUrn); + } + if (ownershipTypeUrn == null) { + return true; + } + // Fall back to mapping deprecated type to the new ownership entity + return mapOwnershipTypeToEntity(OwnershipType.valueOf(owner.getType().toString()).name()) + .equals(ownershipTypeUrn.toString()); + } + private static void removeOwnersIfExists( - Ownership ownership, List ownerUrns, Optional maybeOwnershipTypeUrn) { - if (!ownership.hasOwners()) { - ownership.setOwners(new OwnerArray()); + Ownership ownershipAspect, List ownerUrns, Urn ownershipTypeUrn) { + if (!ownershipAspect.hasOwners()) { + ownershipAspect.setOwners(new OwnerArray()); } - OwnerArray ownerArray = ownership.getOwners(); + OwnerArray ownerArray = ownershipAspect.getOwners(); for (Urn ownerUrn : ownerUrns) { - if (maybeOwnershipTypeUrn.isPresent()) { - ownerArray.removeIf( - owner -> { - // Remove ownership if it exists (check ownerUrn + type (entity & deprecated type)) - - // Owner is not what we are looking for - if (!owner.getOwner().equals(ownerUrn)) { - return false; - } - - // Check custom entity type urn if exists - if (owner.getTypeUrn() != null) { - return owner.getTypeUrn().equals(maybeOwnershipTypeUrn.get()); - } - - // Fall back to mapping deprecated type to the new ownership entity, if it matches - // remove - return mapOwnershipTypeToEntity( - OwnershipType.valueOf(owner.getType().toString()).name()) - .equals(maybeOwnershipTypeUrn.get().toString()); - }); - } else { - ownerArray.removeIf(owner -> owner.getOwner().equals(ownerUrn)); - } + removeExistingOwnerIfExists(ownerArray, ownerUrn, ownershipTypeUrn); } } - public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context, Urn resourceUrn) { + public static void validateAuthorizedToUpdateOwners( + @Nonnull QueryContext context, Urn resourceUrn) { final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( ImmutableList.of( @@ -200,26 +188,27 @@ public static boolean isAuthorizedToUpdateOwners(@Nonnull QueryContext context, new ConjunctivePrivilegeGroup( ImmutableList.of(PoliciesConfig.EDIT_ENTITY_OWNERS_PRIVILEGE.getType())))); - return AuthorizationUtils.isAuthorized( - context.getAuthorizer(), - context.getActorUrn(), - resourceUrn.getEntityType(), - resourceUrn.toString(), - orPrivilegeGroups); + boolean authorized = + AuthorizationUtils.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + resourceUrn.getEntityType(), + resourceUrn.toString(), + orPrivilegeGroups); + if (!authorized) { + throw new AuthorizationException( + "Unauthorized to update owners. Please contact your DataHub administrator."); + } } - public static Boolean validateAddOwnerInput( + public static void validateAddOwnerInput( List owners, Urn resourceUrn, EntityService entityService) { for (OwnerInput owner : owners) { - boolean result = validateAddOwnerInput(owner, resourceUrn, entityService); - if (!result) { - return false; - } + validateAddOwnerInput(owner, resourceUrn, entityService); } - return true; } - public static Boolean validateAddOwnerInput( + public static void validateAddOwnerInput( OwnerInput owner, Urn resourceUrn, EntityService entityService) { if (!entityService.exists(resourceUrn)) { @@ -229,8 +218,6 @@ public static Boolean validateAddOwnerInput( } validateOwner(owner, entityService); - - return true; } public static void validateOwner(OwnerInput owner, EntityService entityService) { @@ -277,23 +264,26 @@ public static void validateOwner(OwnerInput owner, EntityService entityService) } } - public static Boolean validateRemoveInput(Urn resourceUrn, EntityService entityService) { + public static void validateRemoveInput(Urn resourceUrn, EntityService entityService) { if (!entityService.exists(resourceUrn)) { throw new IllegalArgumentException( String.format( "Failed to change ownership for resource %s. Resource does not exist.", resourceUrn)); } - return true; } public static void addCreatorAsOwner( QueryContext context, String urn, OwnerEntityType ownerEntityType, - OwnershipType ownershipType, EntityService entityService) { try { Urn actorUrn = CorpuserUrn.createFromString(context.getActorUrn()); + OwnershipType ownershipType = OwnershipType.TECHNICAL_OWNER; + if (!entityService.exists(UrnUtils.getUrn(mapOwnershipTypeToEntity(ownershipType.name())))) { + log.warn("Technical owner does not exist, defaulting to None ownership."); + ownershipType = OwnershipType.NONE; + } String ownershipTypeUrn = mapOwnershipTypeToEntity(ownershipType.name()); if (!entityService.exists(UrnUtils.getUrn(ownershipTypeUrn))) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java index 153c95c697a77..9e3ca0f2d45a6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/tag/CreateTagResolver.java @@ -2,17 +2,14 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; -import static com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils.*; import static com.linkedin.metadata.Constants.*; -import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.CreateTagInput; import com.linkedin.datahub.graphql.generated.OwnerEntityType; -import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; @@ -72,15 +69,9 @@ public CompletableFuture get(DataFetchingEnvironment environment) throws key, TAG_ENTITY_NAME, TAG_PROPERTIES_ASPECT_NAME, mapTagProperties(input)); String tagUrn = _entityClient.ingestProposal(proposal, context.getAuthentication(), false); - OwnershipType ownershipType = OwnershipType.TECHNICAL_OWNER; - if (!_entityService.exists( - UrnUtils.getUrn(mapOwnershipTypeToEntity(ownershipType.name())))) { - log.warn("Technical owner does not exist, defaulting to None ownership."); - ownershipType = OwnershipType.NONE; - } OwnerUtils.addCreatorAsOwner( - context, tagUrn, OwnerEntityType.CORP_USER, ownershipType, _entityService); + context, tagUrn, OwnerEntityType.CORP_USER, _entityService); return tagUrn; } catch (Exception e) { log.error( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java new file mode 100644 index 0000000000000..b4097d9dd045d --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/utils/OwnerUtilsTest.java @@ -0,0 +1,110 @@ +package com.linkedin.datahub.graphql.utils; + +import static org.testng.AssertJUnit.*; + +import com.linkedin.common.Owner; +import com.linkedin.common.OwnershipType; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import java.net.URISyntaxException; +import org.testng.annotations.Test; + +public class OwnerUtilsTest { + + public static String TECHNICAL_OWNER_OWNERSHIP_TYPE_URN = + "urn:li:ownershipType:__system__technical_owner"; + public static String BUSINESS_OWNER_OWNERSHIP_TYPE_URN = + "urn:li:ownershipType:__system__business_owner"; + + @Test + public void testMapOwnershipType() { + assertEquals( + OwnerUtils.mapOwnershipTypeToEntity("TECHNICAL_OWNER"), TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); + } + + @Test + public void testIsOwnerEqualUrnOnly() throws URISyntaxException { + Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + Owner owner1 = new Owner(); + owner1.setOwner(ownerUrn1); + assertTrue(OwnerUtils.isOwnerEqual(owner1, ownerUrn1, null)); + + Urn ownerUrn2 = new Urn("urn:li:corpuser:bar"); + assertFalse(OwnerUtils.isOwnerEqual(owner1, ownerUrn2, null)); + } + + @Test + public void testIsOwnerEqualWithLegacyTypeOnly() throws URISyntaxException { + + Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); + Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + Owner ownerWithTechnicalOwnership = new Owner(); + ownerWithTechnicalOwnership.setOwner(ownerUrn1); + ownerWithTechnicalOwnership.setType(OwnershipType.TECHNICAL_OWNER); + + assertTrue( + OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); + + Owner ownerWithBusinessOwnership = new Owner(); + ownerWithBusinessOwnership.setOwner(ownerUrn1); + ownerWithBusinessOwnership.setType(OwnershipType.BUSINESS_OWNER); + assertFalse( + OwnerUtils.isOwnerEqual( + ownerWithBusinessOwnership, ownerUrn1, new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN))); + } + + @Test + public void testIsOwnerEqualOnlyOwnershipTypeUrn() throws URISyntaxException { + + Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); + Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN); + Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + + Owner ownerWithTechnicalOwnership = new Owner(); + ownerWithTechnicalOwnership.setOwner(ownerUrn1); + ownerWithTechnicalOwnership.setTypeUrn(technicalOwnershipTypeUrn); + + Owner ownerWithBusinessOwnership = new Owner(); + ownerWithBusinessOwnership.setOwner(ownerUrn1); + ownerWithBusinessOwnership.setTypeUrn(businessOwnershipTypeUrn); + + Owner ownerWithoutOwnershipType = new Owner(); + ownerWithoutOwnershipType.setOwner(ownerUrn1); + ownerWithoutOwnershipType.setType(OwnershipType.NONE); + + assertTrue( + OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); + assertFalse( + OwnerUtils.isOwnerEqual(ownerWithBusinessOwnership, ownerUrn1, technicalOwnershipTypeUrn)); + assertFalse(OwnerUtils.isOwnerEqual(ownerWithTechnicalOwnership, ownerUrn1, null)); + assertTrue(OwnerUtils.isOwnerEqual(ownerWithoutOwnershipType, ownerUrn1, null)); + } + + public void testIsOwnerEqualWithBothLegacyAndNewType() throws URISyntaxException { + Urn technicalOwnershipTypeUrn = new Urn(TECHNICAL_OWNER_OWNERSHIP_TYPE_URN); + Urn businessOwnershipTypeUrn = new Urn(BUSINESS_OWNER_OWNERSHIP_TYPE_URN); + Urn ownerUrn1 = new Urn("urn:li:corpuser:foo"); + + Owner ownerWithLegacyTechnicalOwnership = new Owner(); + ownerWithLegacyTechnicalOwnership.setOwner(ownerUrn1); + ownerWithLegacyTechnicalOwnership.setType(OwnershipType.TECHNICAL_OWNER); + + assertTrue( + OwnerUtils.isOwnerEqual( + ownerWithLegacyTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); + assertFalse( + OwnerUtils.isOwnerEqual( + ownerWithLegacyTechnicalOwnership, ownerUrn1, businessOwnershipTypeUrn)); + + Owner ownerWithNewTechnicalOwnership = new Owner(); + ownerWithLegacyTechnicalOwnership.setOwner(ownerUrn1); + ownerWithLegacyTechnicalOwnership.setTypeUrn(technicalOwnershipTypeUrn); + + assertTrue( + OwnerUtils.isOwnerEqual( + ownerWithNewTechnicalOwnership, ownerUrn1, technicalOwnershipTypeUrn)); + assertFalse( + OwnerUtils.isOwnerEqual( + ownerWithNewTechnicalOwnership, ownerUrn1, businessOwnershipTypeUrn)); + } +} From 2270a6d5f9768e82cb3e6687184a23991a0ba78e Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 9 Jan 2024 14:15:50 -0600 Subject: [PATCH 04/16] fix(frontend): restrict redirect uri domain (#9592) --- .../controllers/AuthenticationController.java | 15 ++++++++++++- .../test/app/ApplicationTest.java | 21 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java index 9c232e965a003..d9568c25f6e8c 100644 --- a/datahub-frontend/app/controllers/AuthenticationController.java +++ b/datahub-frontend/app/controllers/AuthenticationController.java @@ -15,12 +15,15 @@ import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.Urn; import com.typesafe.config.Config; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Optional; import javax.annotation.Nonnull; import javax.inject.Inject; +import org.apache.commons.httpclient.InvalidRedirectLocationException; import org.apache.commons.lang3.StringUtils; import org.pac4j.core.client.Client; import org.pac4j.core.context.Cookie; @@ -86,7 +89,17 @@ public Result authenticate(Http.Request request) { final Optional maybeRedirectPath = Optional.ofNullable(request.getQueryString(AUTH_REDIRECT_URI_PARAM)); - final String redirectPath = maybeRedirectPath.orElse("/"); + String redirectPath = maybeRedirectPath.orElse("/"); + try { + URI redirectUri = new URI(redirectPath); + if (redirectUri.getScheme() != null || redirectUri.getAuthority() != null) { + throw new InvalidRedirectLocationException("Redirect location must be relative to the base url, cannot " + + "redirect to other domains: " + redirectPath, redirectPath); + } + } catch (URISyntaxException | InvalidRedirectLocationException e) { + _logger.warn(e.getMessage()); + redirectPath = "/"; + } if (AuthUtils.hasValidSessionCookie(request)) { return Results.redirect(redirectPath); diff --git a/datahub-frontend/test/app/ApplicationTest.java b/datahub-frontend/test/app/ApplicationTest.java index 8d80c2cfaa47d..534cffb5cc7fe 100644 --- a/datahub-frontend/test/app/ApplicationTest.java +++ b/datahub-frontend/test/app/ApplicationTest.java @@ -195,8 +195,27 @@ public void testAPI() throws ParseException { } @Test - public void testOidcRedirectToRequestedUrl() throws InterruptedException { + public void testOidcRedirectToRequestedUrl() { browser.goTo("/authenticate?redirect_uri=%2Fcontainer%2Furn%3Ali%3Acontainer%3ADATABASE"); assertEquals("container/urn:li:container:DATABASE", browser.url()); } + + /** + * The Redirect Uri parameter is used to store a previous relative location within the app to be able to + * take a user back to their expected page. Redirecting to other domains should be blocked. + */ + @Test + public void testInvalidRedirectUrl() { + browser.goTo("/authenticate?redirect_uri=https%3A%2F%2Fwww.google.com"); + assertEquals("", browser.url()); + + browser.goTo("/authenticate?redirect_uri=file%3A%2F%2FmyFile"); + assertEquals("", browser.url()); + + browser.goTo("/authenticate?redirect_uri=ftp%3A%2F%2FsomeFtp"); + assertEquals("", browser.url()); + + browser.goTo("/authenticate?redirect_uri=localhost%3A9002%2Flogin"); + assertEquals("", browser.url()); + } } From aa28837eede128d1117bcab9664143e890d27119 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 9 Jan 2024 16:07:14 -0500 Subject: [PATCH 05/16] docs(slack): add note about private Slack channels (#9589) --- docs/managed-datahub/saas-slack-setup.md | 6 ++++-- .../docs/sources/postgres/postgres_recipe.yml | 3 --- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/managed-datahub/saas-slack-setup.md b/docs/managed-datahub/saas-slack-setup.md index 8d4519b878cd8..1b98f3a30773a 100644 --- a/docs/managed-datahub/saas-slack-setup.md +++ b/docs/managed-datahub/saas-slack-setup.md @@ -71,13 +71,13 @@ To enable the integration with slack - Enter a **Default Slack Channel** - this is where all notifications will be routed unless - Click **Update** to save your settings -![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_add_token.png) + To enable and disable specific types of notifications, or configure custom routing for notifications, start by navigating to **Settings > Notifications**. To enable or disable a specific notification type in Slack, simply click the check mark. By default, all notification types are enabled. To customize the channel where notifications are send, click the button to the right of the check box. -![](https://raw.githubusercontent.com/datahub-project/static-assets/main/imgs/integrations/slack/slack_channel.png) + If provided, a custom channel will be used to route notifications of the given type. If not provided, the default channel will be used. That's it! You should begin to receive notifications on Slack. Note that it may take up to 1 minute for notification settings to take effect after saving. @@ -89,6 +89,8 @@ For now we support sending notifications to - Slack Channel Name (e.g. `#troubleshoot`) - Specific Users (aka Direct Messages or DMs) via user ID +By default, the Slack app will be able to send notifications to public channels. If you want to send notifications to private channels or DMs, you will need to invite the Slack app to those channels. + ## How to find Team ID and Channel ID in Slack - Go to the Slack channel for which you want to get channel ID diff --git a/metadata-ingestion/docs/sources/postgres/postgres_recipe.yml b/metadata-ingestion/docs/sources/postgres/postgres_recipe.yml index d6980dc599c5e..6b671c79848dc 100644 --- a/metadata-ingestion/docs/sources/postgres/postgres_recipe.yml +++ b/metadata-ingestion/docs/sources/postgres/postgres_recipe.yml @@ -9,9 +9,6 @@ source: username: user password: pass - # Options - database_alias: DatabaseNameToBeIngested - # Optional: SSL configuration. # options: # connect_args: From e3f2c52d7e6cfdc3b49804575538fc25b746525f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 9 Jan 2024 16:07:49 -0500 Subject: [PATCH 06/16] fix(ingest/snowflake): set platform instance for foreign keys (#9577) --- .../datahub/ingestion/source/snowflake/snowflake_v2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index a5c07d9a3870c..50f50ec647434 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -13,7 +13,6 @@ from datahub.configuration.pattern_utils import is_schema_allowed from datahub.emitter.mce_builder import ( make_data_platform_urn, - make_dataset_urn, make_dataset_urn_with_platform_instance, make_schema_field_urn, make_tag_urn, @@ -1237,12 +1236,13 @@ def build_foreign_keys( ) -> List[ForeignKeyConstraint]: foreign_keys = [] for fk in table.foreign_keys: - foreign_dataset = make_dataset_urn( - self.platform, - self.get_dataset_identifier( + foreign_dataset = make_dataset_urn_with_platform_instance( + platform=self.platform, + name=self.get_dataset_identifier( fk.referred_table, fk.referred_schema, fk.referred_database ), - self.config.env, + env=self.config.env, + platform_instance=self.config.platform_instance, ) foreign_keys.append( ForeignKeyConstraint( From cca1e9dd495e85fecaa5c128b5e9848a8e931e9f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Tue, 9 Jan 2024 16:08:31 -0500 Subject: [PATCH 07/16] fix(ingest/redshift): include table type in custom properties (#9576) --- .../src/datahub/ingestion/source/redshift/redshift.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index eb635b1292b81..d3b759c985233 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -165,7 +165,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): #### sql_based The sql_based based collector uses Redshift's [stl_insert](https://docs.aws.amazon.com/redshift/latest/dg/r_STL_INSERT.html) to discover all the insert queries - and uses sql parsing to discover the dependecies. + and uses sql parsing to discover the dependencies. Pros: - Works with Spectrum tables @@ -189,7 +189,7 @@ class RedshiftSource(StatefulIngestionSourceBase, TestableSource): :::note - The redshift stl redshift tables which are used for getting data lineage only retain approximately two to five days of log history. This means you cannot extract lineage from queries issued outside that window. + The redshift stl redshift tables which are used for getting data lineage retain at most seven days of log history, and sometimes closer to 2-5 days. This means you cannot extract lineage from queries issued outside that window. ::: @@ -592,6 +592,9 @@ def gen_table_dataset_workunits( ) -> Iterable[MetadataWorkUnit]: custom_properties = {} + if table.type: + custom_properties["table_type"] = table.type + if table.location: custom_properties["location"] = table.location From b0060cec27d09e69ce3913b6d5b8a7ea8dffde76 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Tue, 9 Jan 2024 16:38:43 -0600 Subject: [PATCH 08/16] fix(graphql): fix npe in access mapper (#9593) --- .../graphql/types/rolemetadata/mappers/AccessMapper.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/rolemetadata/mappers/AccessMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/rolemetadata/mappers/AccessMapper.java index 3eb090e452439..2d6bd31c84fd9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/rolemetadata/mappers/AccessMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/rolemetadata/mappers/AccessMapper.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.rolemetadata.mappers; +import com.linkedin.common.RoleAssociationArray; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.generated.Role; @@ -19,8 +20,10 @@ public com.linkedin.datahub.graphql.generated.Access apply( @Nonnull final com.linkedin.common.Access access, @Nonnull final Urn entityUrn) { com.linkedin.datahub.graphql.generated.Access result = new com.linkedin.datahub.graphql.generated.Access(); + RoleAssociationArray roles = + access.getRoles() != null ? access.getRoles() : new RoleAssociationArray(); result.setRoles( - access.getRoles().stream() + roles.stream() .map(association -> this.mapRoleAssociation(association, entityUrn)) .collect(Collectors.toList())); return result; From 39e88ef16d98db1326b3821e93cb941406852829 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 10 Jan 2024 03:56:29 -0500 Subject: [PATCH 09/16] fix(ingest/bigquery): support google-cloud-bigquery 3.15.0 (#9595) --- .../src/datahub/emitter/sql_parsing_builder.py | 2 +- .../src/datahub/ingestion/run/pipeline.py | 4 ++++ .../src/datahub/ingestion/source/ge_data_profiler.py | 10 +++++++--- .../ingestion/source/snowflake/snowflake_lineage_v2.py | 4 ++-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py index ea5ebf705707a..046b615bd4e9f 100644 --- a/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py +++ b/metadata-ingestion/src/datahub/emitter/sql_parsing_builder.py @@ -92,7 +92,7 @@ class SqlParsingBuilder: def __post_init__(self) -> None: if self.usage_config: self._usage_aggregator = UsageAggregator(self.usage_config) - else: + elif self.generate_usage_statistics: logger.info("No usage config provided, not generating usage statistics") self.generate_usage_statistics = False diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py index 1641d71aba199..70ff6992645e7 100644 --- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py +++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py @@ -137,6 +137,7 @@ class CliReport(Report): disk_info: Optional[dict] = None peak_disk_usage: Optional[str] = None + _initial_disk_usage: int = -1 _peak_disk_usage: int = 0 thread_count: Optional[int] = None @@ -156,12 +157,15 @@ def compute_stats(self) -> None: try: disk_usage = shutil.disk_usage("/") + if self._initial_disk_usage < 0: + self._initial_disk_usage = disk_usage.used if self._peak_disk_usage < disk_usage.used: self._peak_disk_usage = disk_usage.used self.peak_disk_usage = humanfriendly.format_size(self._peak_disk_usage) self.disk_info = { "total": humanfriendly.format_size(disk_usage.total), "used": humanfriendly.format_size(disk_usage.used), + "used_initally": humanfriendly.format_size(self._initial_disk_usage), "free": humanfriendly.format_size(disk_usage.free), } except Exception as e: diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index abb415c90cc8b..4f1ad00b1e425 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -1283,9 +1283,13 @@ def create_bigquery_temp_table( # temporary table dance. However, that would require either a) upgrading to # use GE's batch v3 API or b) bypassing GE altogether. - query_job: Optional[ - "google.cloud.bigquery.job.query.QueryJob" - ] = cursor._query_job + query_job: Optional["google.cloud.bigquery.job.query.QueryJob"] = ( + # In google-cloud-bigquery 3.15.0, the _query_job attribute was + # made public and renamed to query_job. + cursor.query_job + if hasattr(cursor, "query_job") + else cursor._query_job # type: ignore[attr-defined] + ) assert query_job temp_destination_table = query_job.destination bigquery_temp_table = f"{temp_destination_table.project}.{temp_destination_table.dataset_id}.{temp_destination_table.table_id}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 4219533dc217c..142dbbf12f010 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -353,13 +353,13 @@ def _populate_external_lineage_map(self, discovered_tables: List[str]) -> None: self._populate_external_lineage_from_copy_history(discovered_tables) logger.info( - "Done populating external lineage from copy history." + "Done populating external lineage from copy history. " f"Found {self.report.num_external_table_edges_scanned} external lineage edges so far." ) self._populate_external_lineage_from_show_query(discovered_tables) logger.info( - "Done populating external lineage from show external tables." + "Done populating external lineage from show external tables. " f"Found {self.report.num_external_table_edges_scanned} external lineage edges so far." ) From 8efe30f5f810bcbb7ce2b2f4e18fa4e3c5ca0f46 Mon Sep 17 00:00:00 2001 From: Kunal-kankriya <127090035+Kunal-kankriya@users.noreply.github.com> Date: Wed, 10 Jan 2024 18:36:06 +0530 Subject: [PATCH 10/16] fix(tests) - update query tab tests to fix flakiness (#9598) --- .../cypress/cypress/e2e/query/query_tab.js | 128 +++++++++--------- 1 file changed, 62 insertions(+), 66 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js index 4d01cac15724e..015ce8c058eb8 100644 --- a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js +++ b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js @@ -1,75 +1,71 @@ const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)'; +const runId = Date.now() -describe("manage queries", () => { - beforeEach(() => { - cy.login(); - cy.goToDataset( - DATASET_URN, - "SampleCypressHdfsDataset" - ); - cy.hideOnboardingTour(); - cy.openEntityTab("Queries") - }) - - it("go to queries tab on dataset page then, create, edit, make default, delete a view", () => { - const runId = Date.now() - - // Headers - cy.waitTextVisible("Highlighted Queries"); - cy.ensureTextNotPresent("Recent Queries"); - - // Add new Query - cy.get('[data-testid="add-query-button"]').click(); - cy.get('[class="query-builder-editor-input"]').click(); - cy.get('[class="query-builder-editor-input"]').type(` + Test Query-${runId}`); - cy.get('[data-testid="query-builder-title-input"]').click(); - cy.get('[data-testid="query-builder-title-input"]').type(`Test Table-${runId}`); - cy.get('.ProseMirror').click(); - cy.get('.ProseMirror').type(`Test Description-${runId}`); - cy.get('[data-testid="query-builder-save-button"]').click(); - cy.waitTextVisible("Created Query!"); - - // Verify the card - cy.waitTextVisible(`+ Test Query-${runId}`); - cy.waitTextVisible(`Test Table-${runId}`); - cy.waitTextVisible(`Test Description-${runId}`); - cy.waitTextVisible("Created on"); - - // View the Query - cy.get('[data-testid="query-content-0"]').click(); - cy.get('.ant-modal-content').waitTextVisible(`+ Test Query-${runId}`); - cy.get('.ant-modal-content').waitTextVisible(`Test Table-${runId}`); - cy.get('.ant-modal-content').waitTextVisible(`Test Description-${runId}`); - cy.get('[data-testid="query-modal-close-button"]').click(); +const addNewQuery = () => { + cy.get('[data-testid="add-query-button"]').click(); + cy.get('[data-mode-id="sql"]').click() + .type(` + Test Query-${runId}`); + cy.get('[data-testid="query-builder-title-input"]').click() + .type(`Test Table-${runId}`); + cy.get('.ProseMirror').click() + .type(`Test Description-${runId}`); + cy.get('[data-testid="query-builder-save-button"]').click(); + cy.waitTextVisible("Created Query!"); +} - // Edit the Query - cy.get('[data-testid="query-edit-button-0"]').click() - cy.get('[class="query-builder-editor-input"]').click(); - cy.get('[class="query-builder-editor-input"]').type(` + Edited Query-${runId}`); - cy.get('[data-testid="query-builder-title-input"]').click(); - cy.get('[data-testid="query-builder-title-input"]').clear(); - cy.get('[data-testid="query-builder-title-input"]').type(`Edited Table-${runId}`); - cy.get('.ProseMirror').click(); - cy.get('.ProseMirror').clear(); - cy.get('.ProseMirror').type(`Edited Description-${runId}`); - cy.get('[data-testid="query-builder-save-button"]').click(); - cy.waitTextVisible("Edited Query!"); +const editQuery = () => { + cy.get('[data-testid="query-edit-button-0"]').click() + cy.get('[data-mode-id="sql"]').click() + .type(` + Edited Query-${runId}`); + cy.get('[data-testid="query-builder-title-input"]').click().clear() + .type(`Edited Table-${runId}`); + cy.get('.ProseMirror').click().clear() + .type(`Edited Description-${runId}`); + cy.get('[data-testid="query-builder-save-button"]').click(); + cy.waitTextVisible("Edited Query!"); + } - // Verify edited Query card - cy.get('[data-testid="query-content-0"]').scrollIntoView().should('be.visible'); - cy.waitTextVisible(`+ Test Query-${runId} + Edited Query-${runId}`); - cy.waitTextVisible(`Edited Table-${runId}`); - cy.waitTextVisible(`Edited Description-${runId}`); - - // Delete the Query + const deleteQuery = () => { cy.get('[data-testid="query-more-button-0"]').click(); - cy.get('[data-testid="query-delete-button-0"]').click(); - cy.contains('Yes').click(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText('Yes') cy.waitTextVisible("Deleted Query!"); + } + + const verifyViewCardDetails = (query,title,description) => { + cy.get('[data-testid="query-content-0"]').scrollIntoView().should('be.visible').click() + cy.get('.ant-modal-content').waitTextVisible(query); + cy.get('.ant-modal-content').waitTextVisible(title); + cy.get('.ant-modal-content').waitTextVisible(description); +}; - // Query should be gone - cy.ensureTextNotPresent(`+ Test Query-${runId} + Edited Query-${runId}`); - cy.ensureTextNotPresent(`Edited Table-${runId}`); - cy.ensureTextNotPresent(`Edited Description-${runId}`); +describe("manage queries", () => { + beforeEach(() => { + cy.loginWithCredentials(); + cy.goToDataset(DATASET_URN,"SampleCypressHdfsDataset"); + cy.openEntityTab("Queries") + }) + + it("go to queries tab on dataset page then create query and verify & view the card", () => { + cy.waitTextVisible("Highlighted Queries"); + cy.ensureTextNotPresent("Recent Queries"); + addNewQuery(); + cy.waitTextVisible(`+ Test Query-${runId}`); + cy.waitTextVisible(`Test Table-${runId}`); + cy.waitTextVisible(`Test Description-${runId}`); + cy.waitTextVisible("Created on"); + verifyViewCardDetails(`+ Test Query-${runId}`,`Test Table-${runId}`,`Test Description-${runId}`) }); + + it("go to queries tab on dataset page then edit the query and verify edited Query card", () => { + editQuery(); + verifyViewCardDetails(`+ Test Query-${runId} + Edited Query-${runId}`,`Edited Table-${runId}`,`Edited Description-${runId}`) + }); + + it("go to queries tab on dataset page then delete the query and verify that query should be gone", () => { + deleteQuery(); + cy.ensureTextNotPresent(`+ Test Query-${runId} + Edited Query-${runId}`); + cy.ensureTextNotPresent(`Edited Table-${runId}`); + cy.ensureTextNotPresent(`Edited Description-${runId}`); + }); }); \ No newline at end of file From 3f9b90158fe0881fbcde752060d82693f5efd1ad Mon Sep 17 00:00:00 2001 From: Shubham Jagtap <132359390+shubhamjagtap639@users.noreply.github.com> Date: Wed, 10 Jan 2024 23:26:04 +0530 Subject: [PATCH 11/16] fix(ingestion/starburst-trino): Fix validation errors in TrinoJoinedAccessEvent (#9580) Co-authored-by: Harshal Sheth --- .../source/usage/starburst_trino_usage.py | 31 ++++++++++++++----- .../trino_usages_golden.json | 16 +++++----- .../usage_events_history.json | 4 +-- 3 files changed, 34 insertions(+), 17 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py index 31c568941c04e..e71f640c14256 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/usage/starburst_trino_usage.py @@ -58,7 +58,7 @@ class TrinoConnectorInfo(BaseModel): partitionIds: List[str] - truncated: bool + truncated: Optional[bool] class TrinoAccessedMetadata(BaseModel): @@ -78,7 +78,7 @@ class TrinoJoinedAccessEvent(BaseModel): table: Optional[str] = None accessed_metadata: List[TrinoAccessedMetadata] starttime: datetime = Field(alias="create_time") - endtime: datetime = Field(alias="end_time") + endtime: Optional[datetime] = Field(alias="end_time") class EnvBasedSourceBaseConfig: @@ -102,6 +102,11 @@ def get_sql_alchemy_url(self): return super().get_sql_alchemy_url() +@dataclasses.dataclass +class TrinoUsageReport(SourceReport): + num_joined_access_events_skipped: int = 0 + + @platform_name("Trino") @config_class(TrinoUsageConfig) @support_status(SupportStatus.CERTIFIED) @@ -119,7 +124,7 @@ class TrinoUsageSource(Source): """ config: TrinoUsageConfig - report: SourceReport = dataclasses.field(default_factory=SourceReport) + report: TrinoUsageReport = dataclasses.field(default_factory=TrinoUsageReport) @classmethod def create(cls, config_dict, ctx): @@ -190,15 +195,21 @@ def _convert_str_to_datetime(self, v): def _get_joined_access_event(self, events): joined_access_events = [] for event_dict in events: - event_dict["create_time"] = self._convert_str_to_datetime( - event_dict.get("create_time") - ) + if event_dict.get("create_time"): + event_dict["create_time"] = self._convert_str_to_datetime( + event_dict["create_time"] + ) + else: + self.report.num_joined_access_events_skipped += 1 + logging.info("The create_time parameter is missing. Skipping ....") + continue event_dict["end_time"] = self._convert_str_to_datetime( event_dict.get("end_time") ) if not event_dict["accessed_metadata"]: + self.report.num_joined_access_events_skipped += 1 logging.info("Field accessed_metadata is empty. Skipping ....") continue @@ -207,10 +218,16 @@ def _get_joined_access_event(self, events): ) if not event_dict.get("usr"): + self.report.num_joined_access_events_skipped += 1 logging.info("The username parameter is missing. Skipping ....") continue - joined_access_events.append(TrinoJoinedAccessEvent(**event_dict)) + try: + joined_access_events.append(TrinoJoinedAccessEvent(**event_dict)) + except Exception as e: + self.report.num_joined_access_events_skipped += 1 + logger.info(f"Error while parsing TrinoJoinedAccessEvent: {e}") + return joined_access_events def _aggregate_access_events( diff --git a/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json b/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json index f6c919edc2f45..8bed3cfcd860e 100644 --- a/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json +++ b/metadata-ingestion/tests/integration/starburst-trino-usage/trino_usages_golden.json @@ -6,7 +6,7 @@ "aspectName": "datasetUsageStatistics", "aspect": { "json": { - "timestampMillis": 1634169600000, + "timestampMillis": 1629763200000, "eventGranularity": { "unit": "DAY", "multiple": 1 @@ -16,32 +16,33 @@ "partition": "FULL_TABLE_SNAPSHOT" }, "uniqueUserCount": 1, - "totalSqlQueries": 2, + "totalSqlQueries": 1, "topSqlQueries": [ "select * from testcatalog.testschema.testtable limit 100" ], "userCounts": [ { "user": "urn:li:corpuser:test-name", - "count": 2, + "count": 1, "userEmail": "test-name@acryl.io" } ], "fieldCounts": [ { "fieldPath": "column1", - "count": 2 + "count": 1 }, { "fieldPath": "column2", - "count": 2 + "count": 1 } ] } }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-trino-usage" + "runId": "test-trino-usage", + "lastRunId": "no-run-id-provided" } }, { @@ -56,7 +57,8 @@ }, "systemMetadata": { "lastObserved": 1629795600000, - "runId": "test-trino-usage" + "runId": "test-trino-usage", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/starburst-trino-usage/usage_events_history.json b/metadata-ingestion/tests/integration/starburst-trino-usage/usage_events_history.json index 330cff0ebeffa..5a6724d92c2bc 100644 --- a/metadata-ingestion/tests/integration/starburst-trino-usage/usage_events_history.json +++ b/metadata-ingestion/tests/integration/starburst-trino-usage/usage_events_history.json @@ -5,9 +5,7 @@ "catalog" : null, "schema" : null, "query_type" : "SELECT", - "accessed_metadata" : "[{\"catalogName\":\"testcatalog\",\"schema\":\"testschema\",\"table\":\"testtable\",\"columns\":[\"column1\",\"column2\"],\"physicalInputBytes\":1673886,\"physicalInputRows\":4754}]", - "create_time" : "2021-10-14 09:40:53.108000 UTC", - "end_time" : "2021-10-14 09:40:55.214000 UTC" + "accessed_metadata" : "[{\"catalogName\":\"testcatalog\",\"schema\":\"testschema\",\"table\":\"testtable\",\"columns\":[\"column1\",\"column2\"],\"physicalInputBytes\":1673886,\"physicalInputRows\":4754,\"connectorInfo\":{\"partitionIds\":[\"abc\"]}}]" }, { "usr" : "test-name@acryl.io", From af866eaf955443f837f30d49b6681b68a8cbcb51 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Wed, 10 Jan 2024 23:29:09 +0530 Subject: [PATCH 12/16] =?UTF-8?q?feat(ingest/snowflake):=20improve=20accur?= =?UTF-8?q?acy=20of=20computed=20sample-based=20pro=E2=80=A6=20(#9600)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datahub/ingestion/source/ge_data_profiler.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index 4f1ad00b1e425..91f7c2a140336 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -680,14 +680,12 @@ def generate_dataset_profile( # noqa: C901 (complexity) assert profile.rowCount is not None row_count: int # used for null counts calculation if profile.partitionSpec and "SAMPLE" in profile.partitionSpec.partition: - # We can alternatively use `self._get_dataset_rows(profile)` to get - # exact count of rows in sample, as actual rows involved in sample - # may be slightly different (more or less) than configured `sample_size`. - # However not doing so to start with, as that adds another query overhead - # plus approximate metrics should work for sampling based profiling. - row_count = self.config.sample_size - else: - row_count = profile.rowCount + # Querying exact row count of sample using `_get_dataset_rows`. + # We are not using `self.config.sample_size` directly as actual row count + # in sample may be slightly different (more or less) than configured `sample_size`. + self._get_dataset_rows(profile) + + row_count = profile.rowCount for column_spec in columns_profiling_queue: column = column_spec.column From 0486319bc89470ec9dd22445f579cc413ecbdf30 Mon Sep 17 00:00:00 2001 From: Shubham Jagtap <132359390+shubhamjagtap639@users.noreply.github.com> Date: Thu, 11 Jan 2024 00:48:36 +0530 Subject: [PATCH 13/16] feat(ingestion/fivetran): Add fivetran bigquery destination support (#9531) Co-authored-by: Harshal Sheth --- .../app/ingest/source/builder/sources.json | 2 +- .../docs/sources/fivetran/fivetran_pre.md | 9 +- .../docs/sources/fivetran/fivetran_recipe.yml | 13 +- metadata-ingestion/setup.py | 4 +- .../source/bigquery_v2/bigquery_config.py | 16 +- .../ingestion/source/fivetran/config.py | 29 +- .../ingestion/source/fivetran/fivetran.py | 23 +- .../source/fivetran/fivetran_log_api.py | 81 ++- .../source/fivetran/fivetran_query.py | 104 ++- .../fivetran/fivetran_bigquery_golden.json | 626 ++++++++++++++++++ ...en.json => fivetran_snowflake_golden.json} | 0 .../integration/fivetran/test_fivetran.py | 153 ++++- 12 files changed, 937 insertions(+), 123 deletions(-) create mode 100644 metadata-ingestion/tests/integration/fivetran/fivetran_bigquery_golden.json rename metadata-ingestion/tests/integration/fivetran/{fivetran_golden.json => fivetran_snowflake_golden.json} (100%) diff --git a/datahub-web-react/src/app/ingest/source/builder/sources.json b/datahub-web-react/src/app/ingest/source/builder/sources.json index 2dc2598c1a0ab..e33de13c100b7 100644 --- a/datahub-web-react/src/app/ingest/source/builder/sources.json +++ b/datahub-web-react/src/app/ingest/source/builder/sources.json @@ -221,7 +221,7 @@ "name": "fivetran", "displayName": "Fivetran", "docsUrl": "https://datahubproject.io/docs/generated/ingestion/sources/fivetran/", - "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV" + "recipe": "source:\n type: fivetran\n config:\n # Fivetran log connector destination server configurations\n fivetran_log_config:\n destination_platform: snowflake\n snowflake_destination_config:\n # Coordinates\n account_id: snowflake_account_id\n warehouse: warehouse_name\n database: snowflake_db\n log_schema: fivetran_log_schema\n\n # Credentials\n username: ${SNOWFLAKE_USER}\n password: ${SNOWFLAKE_PASS}\n role: snowflake_role\n\n # Optional - filter for certain connector names instead of ingesting everything.\n # connector_patterns:\n # allow:\n # - connector_name\n\n # Optional -- This mapping is optional and only required to configure platform-instance for source\n # A mapping of Fivetran connector id to data platform instance\n # sources_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV\n\n # Optional -- This mapping is optional and only required to configure platform-instance for destination.\n # A mapping of Fivetran destination id to data platform instance\n # destination_to_platform_instance:\n # calendar_elected:\n # platform_instance: cloud_postgres_instance\n # env: DEV" }, { "urn": "urn:li:dataPlatform:csv-enricher", diff --git a/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md b/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md index 949e408215e6f..8669ef9b44d31 100644 --- a/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md +++ b/metadata-ingestion/docs/sources/fivetran/fivetran_pre.md @@ -26,7 +26,10 @@ Source and destination are mapped to Dataset as an Input and Output of Connector ## Current limitations -Works only for Snowflake destination for now. +Works only for + +- Snowflake destination +- Bigquery destination ## Snowflake destination Configuration Guide 1. If your fivetran platform connector destination is snowflake, you need to provide user details and its role with correct privileges in order to fetch metadata. @@ -49,6 +52,10 @@ grant select on all tables in SCHEMA ""."=8.12.0", + "sqlalchemy-bigquery>=1.4.1", } clickhouse_common = { @@ -294,7 +295,6 @@ | bigquery_common | { *sqlglot_lib, - "sqlalchemy-bigquery>=1.4.1", "google-cloud-datacatalog-lineage==0.2.2", }, "clickhouse": sql_common | clickhouse_common, @@ -396,7 +396,7 @@ "unity-catalog": databricks | sql_common | sqllineage_lib, # databricks is alias for unity-catalog and needs to be kept in sync "databricks": databricks | sql_common | sqllineage_lib, - "fivetran": snowflake_common, + "fivetran": snowflake_common | bigquery_common, } # This is mainly used to exclude plugins from the Docker image. diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index 58f2a600c2ff7..bb14295bc38a8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -80,6 +80,14 @@ def make_gcp_logging_client( else: return GCPLoggingClient(**client_options) + def get_sql_alchemy_url(self) -> str: + if self.project_on_behalf: + return f"bigquery://{self.project_on_behalf}" + # When project_id is not set, we will attempt to detect the project ID + # based on the credentials or environment variables. + # See https://github.com/mxmzdlv/pybigquery#authentication. + return "bigquery://" + class BigQueryV2Config( BigQueryConnectionConfig, @@ -356,14 +364,6 @@ def backward_compatibility_configs_set(cls, values: Dict) -> Dict: def get_table_pattern(self, pattern: List[str]) -> str: return "|".join(pattern) if pattern else "" - def get_sql_alchemy_url(self) -> str: - if self.project_on_behalf: - return f"bigquery://{self.project_on_behalf}" - # When project_id is not set, we will attempt to detect the project ID - # based on the credentials or environment variables. - # See https://github.com/mxmzdlv/pybigquery#authentication. - return "bigquery://" - platform_instance_not_supported_for_bigquery = pydantic_removed_field( "platform_instance" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py index b0843182c5cac..a46bb035a256c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/config.py @@ -4,9 +4,14 @@ import pydantic from pydantic import Field, root_validator +from typing_extensions import Literal from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.source_common import DEFAULT_ENV, DatasetSourceConfigMixin +from datahub.configuration.validate_field_rename import pydantic_renamed_field +from datahub.ingestion.source.bigquery_v2.bigquery_config import ( + BigQueryConnectionConfig, +) from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalSourceReport, StatefulStaleMetadataRemovalConfig, @@ -60,29 +65,45 @@ class Constant: } -class DestinationConfig(BaseSnowflakeConfig): +class SnowflakeDestinationConfig(BaseSnowflakeConfig): database: str = Field(description="The fivetran connector log database.") log_schema: str = Field(description="The fivetran connector log schema.") +class BigQueryDestinationConfig(BigQueryConnectionConfig): + dataset: str = Field(description="The fivetran connector log dataset.") + + class FivetranLogConfig(ConfigModel): - destination_platform: str = pydantic.Field( + destination_platform: Literal["snowflake", "bigquery"] = pydantic.Field( default="snowflake", description="The destination platform where fivetran connector log tables are dumped.", ) - destination_config: Optional[DestinationConfig] = pydantic.Field( + snowflake_destination_config: Optional[SnowflakeDestinationConfig] = pydantic.Field( default=None, description="If destination platform is 'snowflake', provide snowflake configuration.", ) + bigquery_destination_config: Optional[BigQueryDestinationConfig] = pydantic.Field( + default=None, + description="If destination platform is 'bigquery', provide bigquery configuration.", + ) + _rename_destination_config = pydantic_renamed_field( + "destination_config", "snowflake_destination_config" + ) @root_validator(pre=True) def validate_destination_platfrom_and_config(cls, values: Dict) -> Dict: destination_platform = values["destination_platform"] if destination_platform == "snowflake": - if "destination_config" not in values: + if "snowflake_destination_config" not in values: raise ValueError( "If destination platform is 'snowflake', user must provide snowflake destination configuration in the recipe." ) + elif destination_platform == "bigquery": + if "bigquery_destination_config" not in values: + raise ValueError( + "If destination platform is 'bigquery', user must provide bigquery destination configuration in the recipe." + ) else: raise ValueError( f"Destination platform '{destination_platform}' is not yet supported." diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py index 834d9bff0b5cd..b98db660b0ddc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py @@ -119,15 +119,13 @@ def _extend_lineage(self, connector: Connector, datajob: DataJob) -> None: ) input_dataset_urn_list.append(input_dataset_urn) - output_dataset_urn: Optional[DatasetUrn] = None - if self.audit_log.fivetran_log_database: - output_dataset_urn = DatasetUrn.create_from_ids( - platform_id=self.config.fivetran_log_config.destination_platform, - table_name=f"{self.audit_log.fivetran_log_database.lower()}.{table_lineage.destination_table}", - env=destination_platform_detail.env, - platform_instance=destination_platform_detail.platform_instance, - ) - output_dataset_urn_list.append(output_dataset_urn) + output_dataset_urn = DatasetUrn.create_from_ids( + platform_id=self.config.fivetran_log_config.destination_platform, + table_name=f"{self.audit_log.fivetran_log_database.lower()}.{table_lineage.destination_table}", + env=destination_platform_detail.env, + platform_instance=destination_platform_detail.platform_instance, + ) + output_dataset_urn_list.append(output_dataset_urn) if self.config.include_column_lineage: for column_lineage in table_lineage.column_lineage: @@ -282,11 +280,10 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: Datahub Ingestion framework invoke this method """ logger.info("Fivetran plugin execution is started") - connectors = self.audit_log.get_connectors_list() + connectors = self.audit_log.get_allowed_connectors_list( + self.config.connector_patterns, self.report + ) for connector in connectors: - if not self.config.connector_patterns.allowed(connector.connector_name): - self.report.report_connectors_dropped(connector.connector_name) - continue logger.info(f"Processing connector id: {connector.connector_id}") yield from self._get_connector_workunits(connector) diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py index 5680b10982c49..bdef28e30db2b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py @@ -1,10 +1,15 @@ import json import logging -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple from sqlalchemy import create_engine -from datahub.ingestion.source.fivetran.config import Constant, FivetranLogConfig +from datahub.configuration.common import AllowDenyPattern, ConfigurationError +from datahub.ingestion.source.fivetran.config import ( + Constant, + FivetranLogConfig, + FivetranSourceReport, +) from datahub.ingestion.source.fivetran.data_classes import ( ColumnLineage, Connector, @@ -18,30 +23,57 @@ class FivetranLogAPI: def __init__(self, fivetran_log_config: FivetranLogConfig) -> None: - self.fivetran_log_database: Optional[str] = None self.fivetran_log_config = fivetran_log_config - self.engine = self._get_log_destination_engine() + ( + self.engine, + self.fivetran_log_query, + self.fivetran_log_database, + ) = self._initialize_fivetran_variables() - def _get_log_destination_engine(self) -> Any: + def _initialize_fivetran_variables( + self, + ) -> Tuple[Any, FivetranLogQuery, str]: + fivetran_log_query = FivetranLogQuery() destination_platform = self.fivetran_log_config.destination_platform - engine = None # For every destination, create sqlalchemy engine, - # select the database and schema and set fivetran_log_database class variable + # set db_clause to generate select queries and set fivetran_log_database class variable if destination_platform == "snowflake": - snowflake_destination_config = self.fivetran_log_config.destination_config + snowflake_destination_config = ( + self.fivetran_log_config.snowflake_destination_config + ) if snowflake_destination_config is not None: engine = create_engine( snowflake_destination_config.get_sql_alchemy_url(), **snowflake_destination_config.get_options(), ) engine.execute( - FivetranLogQuery.use_schema( + fivetran_log_query.use_database( snowflake_destination_config.database, - snowflake_destination_config.log_schema, ) ) - self.fivetran_log_database = snowflake_destination_config.database - return engine + fivetran_log_query.set_db( + snowflake_destination_config.log_schema, + ) + fivetran_log_database = snowflake_destination_config.database + elif destination_platform == "bigquery": + bigquery_destination_config = ( + self.fivetran_log_config.bigquery_destination_config + ) + if bigquery_destination_config is not None: + engine = create_engine( + bigquery_destination_config.get_sql_alchemy_url(), + ) + fivetran_log_query.set_db(bigquery_destination_config.dataset) + fivetran_log_database = bigquery_destination_config.dataset + else: + raise ConfigurationError( + f"Destination platform '{destination_platform}' is not yet supported." + ) + return ( + engine, + fivetran_log_query, + fivetran_log_database, + ) def _query(self, query: str) -> List[Dict]: logger.debug("Query : {}".format(query)) @@ -50,12 +82,12 @@ def _query(self, query: str) -> List[Dict]: def _get_table_lineage(self, connector_id: str) -> List[TableLineage]: table_lineage_result = self._query( - FivetranLogQuery.get_table_lineage_query(connector_id=connector_id) + self.fivetran_log_query.get_table_lineage_query(connector_id=connector_id) ) table_lineage_list: List[TableLineage] = [] for table_lineage in table_lineage_result: column_lineage_result = self._query( - FivetranLogQuery.get_column_lineage_query( + self.fivetran_log_query.get_column_lineage_query( source_table_id=table_lineage[Constant.SOURCE_TABLE_ID], destination_table_id=table_lineage[Constant.DESTINATION_TABLE_ID], ) @@ -82,13 +114,17 @@ def _get_jobs_list(self, connector_id: str) -> List[Job]: sync_start_logs = { row[Constant.SYNC_ID]: row for row in self._query( - FivetranLogQuery.get_sync_start_logs_query(connector_id=connector_id) + self.fivetran_log_query.get_sync_start_logs_query( + connector_id=connector_id + ) ) } sync_end_logs = { row[Constant.SYNC_ID]: row for row in self._query( - FivetranLogQuery.get_sync_end_logs_query(connector_id=connector_id) + self.fivetran_log_query.get_sync_end_logs_query( + connector_id=connector_id + ) ) } for sync_id in sync_start_logs.keys(): @@ -120,15 +156,22 @@ def _get_jobs_list(self, connector_id: str) -> List[Job]: def _get_user_name(self, user_id: Optional[str]) -> Optional[str]: if not user_id: return None - user_details = self._query(FivetranLogQuery.get_user_query(user_id=user_id))[0] + user_details = self._query( + self.fivetran_log_query.get_user_query(user_id=user_id) + )[0] return ( f"{user_details[Constant.GIVEN_NAME]} {user_details[Constant.FAMILY_NAME]}" ) - def get_connectors_list(self) -> List[Connector]: + def get_allowed_connectors_list( + self, connector_patterns: AllowDenyPattern, report: FivetranSourceReport + ) -> List[Connector]: connectors: List[Connector] = [] - connector_list = self._query(FivetranLogQuery.get_connectors_query()) + connector_list = self._query(self.fivetran_log_query.get_connectors_query()) for connector in connector_list: + if not connector_patterns.allowed(connector[Constant.CONNECTOR_NAME]): + report.report_connectors_dropped(connector[Constant.CONNECTOR_NAME]) + continue connectors.append( Connector( connector_id=connector[Constant.CONNECTOR_ID], diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py index 4f52fcd5d884f..df79b552ed980 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py @@ -1,76 +1,74 @@ class FivetranLogQuery: - @staticmethod - def use_schema(db_name: str, schema_name: str) -> str: - return f'use schema "{db_name}"."{schema_name}"' + def __init__(self) -> None: + # Select query db clause + self.db_clause: str = "" - @staticmethod - def get_connectors_query() -> str: - return """ - SELECT connector_id as "CONNECTOR_ID", - connecting_user_id as "CONNECTING_USER_ID", - connector_type_id as "CONNECTOR_TYPE_ID", - connector_name as "CONNECTOR_NAME", - paused as "PAUSED", - sync_frequency as "SYNC_FREQUENCY", - destination_id as "DESTINATION_ID" - FROM CONNECTOR + def set_db(self, db_name: str) -> None: + self.db_clause = f"{db_name}." + + def use_database(self, db_name: str) -> str: + return f"use database {db_name}" + + def get_connectors_query(self) -> str: + return f""" + SELECT connector_id, + connecting_user_id, + connector_type_id, + connector_name, + paused, + sync_frequency, + destination_id + FROM {self.db_clause}connector WHERE _fivetran_deleted = FALSE""" - @staticmethod - def get_user_query(user_id: str) -> str: + def get_user_query(self, user_id: str) -> str: return f""" - SELECT id as "USER_ID", - given_name as "GIVEN_NAME", - family_name as "FAMILY_NAME" - FROM USER + SELECT id as user_id, + given_name, + family_name + FROM {self.db_clause}user WHERE id = '{user_id}'""" - @staticmethod - def get_sync_start_logs_query( - connector_id: str, - ) -> str: + def get_sync_start_logs_query(self, connector_id: str) -> str: return f""" - SELECT time_stamp as "TIME_STAMP", - sync_id as "SYNC_ID" - FROM LOG + SELECT time_stamp, + sync_id + FROM {self.db_clause}log WHERE message_event = 'sync_start' and connector_id = '{connector_id}' order by time_stamp""" - @staticmethod - def get_sync_end_logs_query(connector_id: str) -> str: + def get_sync_end_logs_query(self, connector_id: str) -> str: return f""" - SELECT time_stamp as "TIME_STAMP", - sync_id as "SYNC_ID", - message_data as "MESSAGE_DATA" - FROM LOG + SELECT time_stamp, + sync_id, + message_data + FROM {self.db_clause}log WHERE message_event = 'sync_end' and connector_id = '{connector_id}' order by time_stamp""" - @staticmethod - def get_table_lineage_query(connector_id: str) -> str: + def get_table_lineage_query(self, connector_id: str) -> str: return f""" - SELECT stm.id as "SOURCE_TABLE_ID", - stm.name as "SOURCE_TABLE_NAME", - ssm.name as "SOURCE_SCHEMA_NAME", - dtm.id as "DESTINATION_TABLE_ID", - dtm.name as "DESTINATION_TABLE_NAME", - dsm.name as "DESTINATION_SCHEMA_NAME" - FROM table_lineage as tl - JOIN source_table_metadata as stm on tl.source_table_id = stm.id - JOIN destination_table_metadata as dtm on tl.destination_table_id = dtm.id - JOIN source_schema_metadata as ssm on stm.schema_id = ssm.id - JOIN destination_schema_metadata as dsm on dtm.schema_id = dsm.id + SELECT stm.id as source_table_id, + stm.name as source_table_name, + ssm.name as source_schema_name, + dtm.id as destination_table_id, + dtm.name as destination_table_name, + dsm.name as destination_schema_name + FROM {self.db_clause}table_lineage as tl + JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id + JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id + JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id + JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id WHERE stm.connector_id = '{connector_id}'""" - @staticmethod def get_column_lineage_query( - source_table_id: str, destination_table_id: str + self, source_table_id: str, destination_table_id: str ) -> str: return f""" - SELECT scm.name as "SOURCE_COLUMN_NAME", - dcm.name as "DESTINATION_COLUMN_NAME" - FROM column_lineage as cl - JOIN source_column_metadata as scm on + SELECT scm.name as source_column_name, + dcm.name as destination_column_name + FROM {self.db_clause}column_lineage as cl + JOIN {self.db_clause}source_column_metadata as scm on (cl.source_column_id = scm.id and scm.table_id = {source_table_id}) - JOIN destination_column_metadata as dcm on + JOIN {self.db_clause}destination_column_metadata as dcm on (cl.destination_column_id = dcm.id and dcm.table_id = {destination_table_id})""" diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_bigquery_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_bigquery_golden.json new file mode 100644 index 0000000000000..ae9e71f0953f4 --- /dev/null +++ b/metadata-ingestion/tests/integration/fivetran/fivetran_bigquery_golden.json @@ -0,0 +1,626 @@ +[ +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "dataFlowInfo", + "aspect": { + "json": { + "customProperties": {}, + "name": "postgres" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "dataJobInfo", + "aspect": { + "json": { + "customProperties": { + "paused": "False", + "sync_frequency": "1440", + "destination_id": "'interval_unconstitutional'" + }, + "name": "postgres", + "type": { + "string": "COMMAND" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "dataJobInputOutput", + "aspect": { + "json": { + "inputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)" + ], + "inputDatajobs": [], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD),name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD),id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD),name)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:Shubham Jagtap", + "type": "DEVELOPER", + "source": { + "type": "SERVICE" + } + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "globalTags", + "aspect": { + "json": { + "tags": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "4c9a03d6-eded-4422-a46a-163266e58243", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1695191853000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1695191853000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:ee88d32dbe3133a23a9023c097050190", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1695191885000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SUCCESS", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "f773d1e9-c791-48f4-894f-8cf9b3dfc834", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1696343730000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343730000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:be36f55c13ec4e313c7510770e50784a", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343732000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "SKIPPED", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceProperties", + "aspect": { + "json": { + "customProperties": {}, + "name": "63c2fc85-600b-455f-9ba0-f576522465be", + "type": "BATCH_SCHEDULED", + "created": { + "time": 1696343755000, + "actor": "urn:li:corpuser:datahub" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRelationships", + "aspect": { + "json": { + "parentTemplate": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "upstreamInstances": [] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceInput", + "aspect": { + "json": { + "inputs": [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.employee,DEV)", + "urn:li:dataset:(urn:li:dataPlatform:postgres,postgres_db.public.company,DEV)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceOutput", + "aspect": { + "json": { + "outputs": [ + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.employee,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:bigquery,test.postgres_public.company,PROD)" + ] + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343755000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "STARTED" + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataProcessInstance", + "entityUrn": "urn:li:dataProcessInstance:d8f100271d2dc3fa905717f82d083c8d", + "changeType": "UPSERT", + "aspectName": "dataProcessInstanceRunEvent", + "aspect": { + "json": { + "timestampMillis": 1696343790000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "status": "COMPLETE", + "result": { + "type": "FAILURE", + "nativeResultType": "fivetran" + } + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(fivetran,calendar_elected,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(fivetran,calendar_elected,PROD),calendar_elected)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1654621200000, + "runId": "powerbi-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/fivetran/fivetran_golden.json b/metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json similarity index 100% rename from metadata-ingestion/tests/integration/fivetran/fivetran_golden.json rename to metadata-ingestion/tests/integration/fivetran/fivetran_snowflake_golden.json diff --git a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py index 62b3df12e1b9d..22dbd58acf1e5 100644 --- a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py +++ b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py @@ -5,18 +5,26 @@ import pytest from freezegun import freeze_time +from datahub.configuration.common import ConfigurationWarning from datahub.ingestion.run.pipeline import Pipeline -from datahub.ingestion.source.fivetran.config import DestinationConfig +from datahub.ingestion.source.fivetran.config import ( + BigQueryDestinationConfig, + FivetranSourceConfig, + SnowflakeDestinationConfig, +) from datahub.ingestion.source.fivetran.fivetran_query import FivetranLogQuery +from datahub.ingestion.source_config.usage.bigquery_usage import BigQueryCredential from tests.test_helpers import mce_helpers FROZEN_TIME = "2022-06-07 17:00:00" def default_query_results(query): - if query == FivetranLogQuery.use_schema("TEST_DATABASE", "TEST_SCHEMA"): + fivetran_log_query = FivetranLogQuery() + fivetran_log_query.set_db("test") + if query == fivetran_log_query.use_database("test_database"): return [] - elif query == FivetranLogQuery.get_connectors_query(): + elif query == fivetran_log_query.get_connectors_query(): return [ { "connector_id": "calendar_elected", @@ -28,7 +36,7 @@ def default_query_results(query): "destination_id": "interval_unconstitutional", }, ] - elif query == FivetranLogQuery.get_table_lineage_query("calendar_elected"): + elif query == fivetran_log_query.get_table_lineage_query("calendar_elected"): return [ { "source_table_id": "10040", @@ -47,9 +55,9 @@ def default_query_results(query): "destination_schema_name": "postgres_public", }, ] - elif query == FivetranLogQuery.get_column_lineage_query( + elif query == fivetran_log_query.get_column_lineage_query( "10040", "7779" - ) or query == FivetranLogQuery.get_column_lineage_query("10041", "7780"): + ) or query == fivetran_log_query.get_column_lineage_query("10041", "7780"): return [ { "source_column_name": "id", @@ -60,7 +68,7 @@ def default_query_results(query): "destination_column_name": "name", }, ] - elif query == FivetranLogQuery.get_user_query("reapply_phone"): + elif query == fivetran_log_query.get_user_query("reapply_phone"): return [ { "user_id": "reapply_phone", @@ -68,7 +76,7 @@ def default_query_results(query): "family_name": "Jagtap", } ] - elif query == FivetranLogQuery.get_sync_start_logs_query("calendar_elected"): + elif query == fivetran_log_query.get_sync_start_logs_query("calendar_elected"): return [ { "time_stamp": datetime.datetime(2023, 9, 20, 6, 37, 32, 606000), @@ -83,7 +91,7 @@ def default_query_results(query): "sync_id": "63c2fc85-600b-455f-9ba0-f576522465be", }, ] - elif query == FivetranLogQuery.get_sync_end_logs_query("calendar_elected"): + elif query == fivetran_log_query.get_sync_end_logs_query("calendar_elected"): return [ { "time_stamp": datetime.datetime(2023, 9, 20, 6, 38, 5, 56000), @@ -107,12 +115,12 @@ def default_query_results(query): @freeze_time(FROZEN_TIME) @pytest.mark.integration -def test_fivetran_basic(pytestconfig, tmp_path): +def test_fivetran_with_snowflake_dest(pytestconfig, tmp_path): test_resources_dir = pytestconfig.rootpath / "tests/integration/fivetran" # Run the metadata ingestion pipeline. output_file = tmp_path / "fivetran_test_events.json" - golden_file = test_resources_dir / "fivetran_golden.json" + golden_file = test_resources_dir / "fivetran_snowflake_golden.json" with mock.patch( "datahub.ingestion.source.fivetran.fivetran_log_api.create_engine" @@ -130,14 +138,14 @@ def test_fivetran_basic(pytestconfig, tmp_path): "config": { "fivetran_log_config": { "destination_platform": "snowflake", - "destination_config": { - "account_id": "TESTID", - "warehouse": "TEST_WH", + "snowflake_destination_config": { + "account_id": "testid", + "warehouse": "test_wh", "username": "test", "password": "test@123", - "database": "TEST_DATABASE", - "role": "TESTROLE", - "log_schema": "TEST_SCHEMA", + "database": "test_database", + "role": "testrole", + "log_schema": "test", }, }, "connector_patterns": { @@ -166,18 +174,87 @@ def test_fivetran_basic(pytestconfig, tmp_path): pipeline.run() pipeline.raise_from_status() - golden_file = "fivetran_golden.json" mce_helpers.check_golden_file( pytestconfig, output_path=f"{output_file}", - golden_path=f"{test_resources_dir}/{golden_file}", + golden_path=f"{golden_file}", ) @freeze_time(FROZEN_TIME) -def test_fivetran_snowflake_destination_config(pytestconfig, tmp_path): - snowflake_dest = DestinationConfig( +@pytest.mark.integration +def test_fivetran_with_bigquery_dest(pytestconfig, tmp_path): + test_resources_dir = pytestconfig.rootpath / "tests/integration/fivetran" + + # Run the metadata ingestion pipeline. + output_file = tmp_path / "fivetran_test_events.json" + golden_file = test_resources_dir / "fivetran_bigquery_golden.json" + + with mock.patch( + "datahub.ingestion.source.fivetran.fivetran_log_api.create_engine" + ) as mock_create_engine: + connection_magic_mock = MagicMock() + connection_magic_mock.execute.side_effect = default_query_results + + mock_create_engine.return_value = connection_magic_mock + + pipeline = Pipeline.create( + { + "run_id": "powerbi-test", + "source": { + "type": "fivetran", + "config": { + "fivetran_log_config": { + "destination_platform": "bigquery", + "bigquery_destination_config": { + "credential": { + "private_key_id": "testprivatekey", + "project_id": "test-project", + "client_email": "fivetran-connector@test-project.iam.gserviceaccount.com", + "client_id": "1234567", + "private_key": "private-key", + }, + "dataset": "test", + }, + }, + "connector_patterns": { + "allow": [ + "postgres", + ] + }, + "sources_to_database": { + "calendar_elected": "postgres_db", + }, + "sources_to_platform_instance": { + "calendar_elected": { + "env": "DEV", + } + }, + }, + }, + "sink": { + "type": "file", + "config": { + "filename": f"{output_file}", + }, + }, + } + ) + + pipeline.run() + pipeline.raise_from_status() + + mce_helpers.check_golden_file( + pytestconfig, + output_path=f"{output_file}", + golden_path=f"{golden_file}", + ) + + +@freeze_time(FROZEN_TIME) +def test_fivetran_snowflake_destination_config(): + snowflake_dest = SnowflakeDestinationConfig( account_id="TESTID", warehouse="TEST_WH", username="test", @@ -190,3 +267,37 @@ def test_fivetran_snowflake_destination_config(pytestconfig, tmp_path): snowflake_dest.get_sql_alchemy_url() == "snowflake://test:test%40123@TESTID?application=acryl_datahub&authenticator=SNOWFLAKE&role=TESTROLE&warehouse=TEST_WH" ) + + +@freeze_time(FROZEN_TIME) +def test_fivetran_bigquery_destination_config(): + bigquery_dest = BigQueryDestinationConfig( + credential=BigQueryCredential( + private_key_id="testprivatekey", + project_id="test-project", + client_email="fivetran-connector@test-project.iam.gserviceaccount.com", + client_id="1234567", + private_key="private-key", + ), + dataset="test_dataset", + ) + assert bigquery_dest.get_sql_alchemy_url() == "bigquery://" + + +@freeze_time(FROZEN_TIME) +def test_rename_destination_config(): + config_dict = { + "fivetran_log_config": { + "destination_platform": "snowflake", + "destination_config": { + "account_id": "testid", + "database": "test_database", + "log_schema": "test", + }, + }, + } + with pytest.warns( + ConfigurationWarning, + match="destination_config is deprecated, please use snowflake_destination_config instead.", + ): + FivetranSourceConfig.parse_obj(config_dict) From 47e6a04fb9696d0376b3abc7909d46b2fe6d1a06 Mon Sep 17 00:00:00 2001 From: Salman-Apptware <101426513+Salman-Apptware@users.noreply.github.com> Date: Thu, 11 Jan 2024 04:07:33 +0530 Subject: [PATCH 14/16] feat(ui): Auto-focus on entity profile action modals (#9444) --- .../shared/EntityDropdown/MoveGlossaryEntityModal.tsx | 3 +++ .../app/entity/shared/EntityDropdown/NodeParentSelect.tsx | 2 ++ .../app/entity/shared/components/styled/AddLinkModal.tsx | 2 ++ .../profile/sidebar/DataProduct/SetDataProductModal.tsx | 2 ++ .../containers/profile/sidebar/Domain/SetDomainModal.tsx | 2 ++ .../profile/sidebar/Ownership/EditOwnersModal.tsx | 2 ++ .../src/app/entity/view/builder/ViewBuilderModal.tsx | 2 ++ datahub-web-react/src/app/shared/tags/AddTagsTermsModal.tsx | 2 ++ datahub-web-react/src/utils/focus/index.ts | 6 ++++++ .../cypress/cypress/e2e/mutations/dataset_ownership.js | 2 +- 10 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 datahub-web-react/src/utils/focus/index.ts diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx index 37a625f58100b..51b39be4e20ea 100644 --- a/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/MoveGlossaryEntityModal.tsx @@ -7,6 +7,7 @@ import { useUpdateParentNodeMutation } from '../../../../graphql/glossary.genera import NodeParentSelect from './NodeParentSelect'; import { useGlossaryEntityData } from '../GlossaryEntityContext'; import { getGlossaryRootToUpdate, getParentNodeToUpdate, updateGlossarySidebar } from '../../../glossary/utils'; +import { getModalDomContainer } from '../../../../utils/focus'; const StyledItem = styled(Form.Item)` margin-bottom: 0; @@ -78,6 +79,7 @@ function MoveGlossaryEntityModal(props: Props) { } + getContainer={getModalDomContainer} >

diff --git a/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx b/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx index c3bfac35c2ca6..e7f5827e33dcc 100644 --- a/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx +++ b/datahub-web-react/src/app/entity/shared/EntityDropdown/NodeParentSelect.tsx @@ -21,6 +21,7 @@ interface Props { selectedParentUrn: string; setSelectedParentUrn: (parent: string) => void; isMoving?: boolean; + autofocus?: boolean; } function NodeParentSelect(props: Props) { @@ -65,6 +66,7 @@ function NodeParentSelect(props: Props) { onClear={clearSelectedParent} onFocus={() => setIsFocusedOnInput(true)} dropdownStyle={isShowingGlossaryBrowser || !searchQuery ? { display: 'none' } : {}} + autoFocus={props.autofocus} > {nodeSearchResults?.map((result) => ( diff --git a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx index 68a8cf4094362..9e18de3b294bf 100644 --- a/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx +++ b/datahub-web-react/src/app/entity/shared/components/styled/AddLinkModal.tsx @@ -5,6 +5,7 @@ import { useEntityData, useMutationUrn } from '../../EntityContext'; import { useAddLinkMutation } from '../../../../../graphql/mutations.generated'; import analytics, { EventType, EntityActionType } from '../../../../analytics'; import { useUserContext } from '../../../../context/useUserContext'; +import { getModalDomContainer } from '../../../../../utils/focus'; type AddLinkProps = { buttonProps?: Record; @@ -73,6 +74,7 @@ export const AddLinkModal = ({ buttonProps, refetch }: AddLinkProps) => { Add , ]} + getContainer={getModalDomContainer} > } + getContainer={getModalDomContainer} >