diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index bb4c26d89f5ce..d38c1030b61be 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -293,6 +293,7 @@ import com.linkedin.datahub.graphql.resolvers.step.BatchGetStepStatesResolver; import com.linkedin.datahub.graphql.resolvers.step.BatchUpdateStepStatesResolver; import com.linkedin.datahub.graphql.resolvers.structuredproperties.CreateStructuredPropertyResolver; +import com.linkedin.datahub.graphql.resolvers.structuredproperties.DeleteStructuredPropertyResolver; import com.linkedin.datahub.graphql.resolvers.structuredproperties.RemoveStructuredPropertiesResolver; import com.linkedin.datahub.graphql.resolvers.structuredproperties.UpdateStructuredPropertyResolver; import com.linkedin.datahub.graphql.resolvers.structuredproperties.UpsertStructuredPropertiesResolver; @@ -1344,6 +1345,9 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher( "updateStructuredProperty", new UpdateStructuredPropertyResolver(this.entityClient)) + .dataFetcher( + "deleteStructuredProperty", + new DeleteStructuredPropertyResolver(this.entityClient)) .dataFetcher("raiseIncident", new RaiseIncidentResolver(this.entityClient)) .dataFetcher( "updateIncidentStatus", @@ -2117,6 +2121,9 @@ private void configureStructuredPropertyResolvers(final RuntimeWiring.Builder bu .getAllowedTypes().stream() .map(entityTypeType.getKeyProvider()) .collect(Collectors.toList())))); + builder.type( + "StructuredPropertyEntity", + typeWiring -> typeWiring.dataFetcher("exists", new EntityExistsResolver(entityService))); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/DeleteStructuredPropertyResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/DeleteStructuredPropertyResolver.java new file mode 100644 index 0000000000000..e7d59494654fd --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/DeleteStructuredPropertyResolver.java @@ -0,0 +1,52 @@ +package com.linkedin.datahub.graphql.resolvers.structuredproperties; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.DeleteStructuredPropertyInput; +import com.linkedin.entity.client.EntityClient; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class DeleteStructuredPropertyResolver implements DataFetcher> { + + private final EntityClient _entityClient; + + public DeleteStructuredPropertyResolver(@Nonnull final EntityClient entityClient) { + _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) + throws Exception { + final QueryContext context = environment.getContext(); + + final DeleteStructuredPropertyInput input = + bindArgument(environment.getArgument("input"), DeleteStructuredPropertyInput.class); + final Urn propertyUrn = UrnUtils.getUrn(input.getUrn()); + + return CompletableFuture.supplyAsync( + () -> { + try { + if (!AuthorizationUtils.canManageStructuredProperties(context)) { + throw new AuthorizationException( + "Unable to delete structured property. Please contact your admin."); + } + _entityClient.deleteEntity(context.getOperationContext(), propertyUrn); + return true; + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to perform update against input %s", input), e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java index cacb6958dc202..c539c65118ac6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java @@ -2,7 +2,9 @@ import static com.linkedin.metadata.Constants.*; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.DataMap; import com.linkedin.data.template.StringArrayMap; import com.linkedin.datahub.graphql.QueryContext; @@ -46,6 +48,9 @@ public StructuredPropertyEntity apply( final StructuredPropertyEntity result = new StructuredPropertyEntity(); result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.STRUCTURED_PROPERTY); + // set the default required values for a structured property in case references are still being + // cleaned up + setDefaultProperty(result); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult( @@ -134,4 +139,22 @@ private EntityTypeEntity createEntityTypeEntity(final String entityTypeUrnStr) { entityType.setType(EntityType.ENTITY_TYPE); return entityType; } + + /* + * In the case that a property is deleted and the references haven't been cleaned up yet (this process is async) + * set a default property to prevent APIs breaking. The UI queries for whether the entity exists and it will + * be filtered out. + */ + private void setDefaultProperty(final StructuredPropertyEntity result) { + StructuredPropertyDefinition definition = new StructuredPropertyDefinition(); + definition.setQualifiedName(""); + definition.setCardinality(PropertyCardinality.SINGLE); + definition.setImmutable(true); + definition.setValueType( + createDataTypeEntity(UrnUtils.getUrn("urn:li:dataType:datahub.string"))); + definition.setEntityTypes( + ImmutableList.of( + createEntityTypeEntity(UrnUtils.getUrn("urn:li:entityType:datahub.dataset")))); + result.setDefinition(definition); + } } diff --git a/datahub-graphql-core/src/main/resources/properties.graphql b/datahub-graphql-core/src/main/resources/properties.graphql index 6c1f910e02b0e..292381d064f36 100644 --- a/datahub-graphql-core/src/main/resources/properties.graphql +++ b/datahub-graphql-core/src/main/resources/properties.graphql @@ -18,6 +18,11 @@ extend type Mutation { Update an existing structured property """ updateStructuredProperty(input: UpdateStructuredPropertyInput!): StructuredPropertyEntity! + + """ + Delete an existing structured property + """ + deleteStructuredProperty(input: DeleteStructuredPropertyInput!): Boolean! } """ @@ -34,6 +39,11 @@ type StructuredPropertyEntity implements Entity { """ type: EntityType! + """ + Whether or not this entity exists on DataHub + """ + exists: Boolean + """ Definition of this structured property including its name """ @@ -457,3 +467,13 @@ input UpdateTypeQualifierInput { """ newAllowedTypes: [String!] } + +""" +Input for deleting a form +""" +input DeleteStructuredPropertyInput { + """ + The urn of the structured properties that is being deleted + """ + urn: String! +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx index f9d7c8db02569..52c141282c345 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx @@ -79,7 +79,10 @@ export const SchemaTab = ({ properties }: { properties?: any }) => { const hasProperties = useMemo( () => entityWithSchema?.schemaMetadata?.fields?.some( - (schemaField) => !!schemaField.schemaFieldEntity?.structuredProperties?.properties?.length, + (schemaField) => + !!schemaField.schemaFieldEntity?.structuredProperties?.properties?.filter( + (prop) => prop.structuredProperty.exists, + )?.length, ), [entityWithSchema], ); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx index 689a191f469f5..9a0da20f22dfd 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx @@ -34,14 +34,16 @@ interface Props { export default function FieldProperties({ expandedField }: Props) { const { schemaFieldEntity } = expandedField; const { refetch } = useGetEntityWithSchema(true); + const properties = + schemaFieldEntity?.structuredProperties?.properties?.filter((prop) => prop.structuredProperty.exists) || []; - if (!schemaFieldEntity?.structuredProperties?.properties?.length) return null; + if (!schemaFieldEntity || !properties.length) return null; return ( <> Properties - {schemaFieldEntity.structuredProperties.properties.map((structuredProp) => { + {properties.map((structuredProp) => { const isRichText = structuredProp.structuredProperty.definition.valueType?.info.type === StdDataType.RichText; const valuesData = mapStructuredPropertyValues(structuredProp); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx index 5600d7c3e8498..86365b8232905 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx @@ -27,23 +27,26 @@ export function mapStructuredPropertyValues(structuredPropertiesEntry: Structure function getStructuredPropertyRows(entityData?: GenericEntityProperties | null) { const structuredPropertyRows: PropertyRow[] = []; - entityData?.structuredProperties?.properties?.forEach((structuredPropertiesEntry) => { - const { displayName, qualifiedName } = structuredPropertiesEntry.structuredProperty.definition; - structuredPropertyRows.push({ - displayName: displayName || qualifiedName, - qualifiedName, - values: mapStructuredPropertyValues(structuredPropertiesEntry), - dataType: structuredPropertiesEntry.structuredProperty.definition.valueType, - structuredProperty: structuredPropertiesEntry.structuredProperty, - type: - structuredPropertiesEntry.values[0] && structuredPropertiesEntry.values[0].__typename - ? { - type: typeNameToType[structuredPropertiesEntry.values[0].__typename].type, - nativeDataType: typeNameToType[structuredPropertiesEntry.values[0].__typename].nativeDataType, - } - : undefined, + entityData?.structuredProperties?.properties + ?.filter((prop) => prop.structuredProperty.exists) + .forEach((structuredPropertiesEntry) => { + const { displayName, qualifiedName } = structuredPropertiesEntry.structuredProperty.definition; + structuredPropertyRows.push({ + displayName: displayName || qualifiedName, + qualifiedName, + values: mapStructuredPropertyValues(structuredPropertiesEntry), + dataType: structuredPropertiesEntry.structuredProperty.definition.valueType, + structuredProperty: structuredPropertiesEntry.structuredProperty, + type: + structuredPropertiesEntry.values[0] && structuredPropertiesEntry.values[0].__typename + ? { + type: typeNameToType[structuredPropertiesEntry.values[0].__typename].type, + nativeDataType: + typeNameToType[structuredPropertiesEntry.values[0].__typename].nativeDataType, + } + : undefined, + }); }); - }); return structuredPropertyRows; } diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index e5bbb5f0dc29d..1874239751723 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -1314,6 +1314,7 @@ fragment structuredPropertyFields on StructuredPropertyEntity { fragment structuredPropertiesFields on StructuredPropertiesEntry { structuredProperty { + exists ...structuredPropertyFields } values { diff --git a/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss b/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss index a5df90f89ddf3..e31f0ec0e1bbb 100644 --- a/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss +++ b/docs-website/src/pages/_components/QuickstartContent/quickstartcontent.module.scss @@ -235,6 +235,7 @@ min-width: 0; width: 100%; padding-left: 20px; + padding-right: 0; div { padding-left: 0; diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag.json index 7c1bc3131fada..531ed6e0e1c63 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=datahub_emitter_operator_jinja_template_dag", "name": "datahub_emitter_operator_jinja_template_dag", - "description": "An example dag with jinja template" + "description": "An example dag with jinja template", + "env": "PROD" } } }, @@ -140,13 +141,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"DatahubEmitterOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"datahub_airflow_plugin.operators.datahub.DatahubEmitterOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"datahub_emitter_operator_jinja_template_dag_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"DatahubEmitterOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"multiple_outputs\": false, \"operator_class\": \"datahub_airflow_plugin.operators.datahub.DatahubEmitterOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"datahub_emitter_operator_jinja_template_dag_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"<>\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=datahub_emitter_operator_jinja_template_dag&_flt_3_task_id=datahub_emitter_operator_jinja_template_dag_task", "name": "datahub_emitter_operator_jinja_template_dag_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -233,16 +235,16 @@ "state": "running", "operator": "DatahubEmitterOperator", "priority_weight": "1", - "log_url": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=datahub_emitter_operator_jinja_template_dag_task&dag_id=datahub_emitter_operator_jinja_template_dag&map_index=-1", + "log_url": "http://airflow.example.com/dags/datahub_emitter_operator_jinja_template_dag/grid?dag_run_id=manual_run_test&task_id=datahub_emitter_operator_jinja_template_dag_task&map_index=-1&tab=logs", "orchestrator": "airflow", "dag_id": "datahub_emitter_operator_jinja_template_dag", "task_id": "datahub_emitter_operator_jinja_template_dag_task" }, - "externalUrl": "http://airflow.example.com/log?execution_date=2023-09-27T21%3A34%3A38%2B00%3A00&task_id=datahub_emitter_operator_jinja_template_dag_task&dag_id=datahub_emitter_operator_jinja_template_dag&map_index=-1", + "externalUrl": "http://airflow.example.com/dags/datahub_emitter_operator_jinja_template_dag/grid?dag_run_id=manual_run_test&task_id=datahub_emitter_operator_jinja_template_dag_task&map_index=-1&tab=logs", "name": "datahub_emitter_operator_jinja_template_dag_datahub_emitter_operator_jinja_template_dag_task_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1726570937652, + "time": 1726699852909, "actor": "urn:li:corpuser:datahub" } } @@ -267,7 +269,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1726570937652, + "timestampMillis": 1726699852909, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -360,13 +362,14 @@ "downstream_task_ids": "[]", "inlets": "[]", "outlets": "[]", - "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"DatahubEmitterOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"operator_class\": \"datahub_airflow_plugin.operators.datahub.DatahubEmitterOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"datahub_emitter_operator_jinja_template_dag_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"downstream\"}, \"type\": \"operator\"}]}" + "openlineage_run_facet_unknownSourceAttribute": "{\"_producer\": \"https://github.com/OpenLineage/OpenLineage/tree/1.18.0/integration/airflow\", \"_schemaURL\": \"https://openlineage.io/spec/2-0-2/OpenLineage.json#/$defs/BaseFacet\", \"unknownItems\": [{\"name\": \"DatahubEmitterOperator\", \"properties\": {\"depends_on_past\": false, \"downstream_task_ids\": \"[]\", \"execution_timeout\": \"<>\", \"executor_config\": {}, \"ignore_first_depends_on_past\": true, \"is_setup\": false, \"is_teardown\": false, \"mapped\": false, \"multiple_outputs\": false, \"operator_class\": \"datahub_airflow_plugin.operators.datahub.DatahubEmitterOperator\", \"owner\": \"airflow\", \"priority_weight\": 1, \"queue\": \"default\", \"retries\": 0, \"retry_exponential_backoff\": false, \"task_id\": \"datahub_emitter_operator_jinja_template_dag_task\", \"trigger_rule\": \"all_success\", \"upstream_task_ids\": \"[]\", \"wait_for_downstream\": false, \"wait_for_past_depends_before_skipping\": false, \"weight_rule\": \"<>\"}, \"type\": \"operator\"}]}" }, "externalUrl": "http://airflow.example.com/taskinstance/list/?flt1_dag_id_equals=datahub_emitter_operator_jinja_template_dag&_flt_3_task_id=datahub_emitter_operator_jinja_template_dag_task", "name": "datahub_emitter_operator_jinja_template_dag_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -441,7 +444,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1726570937811, + "timestampMillis": 1726699853392, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag_no_dag_listener.json index c7b4630b5466a..17fa89985adff 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_datahub_emitter_operator_jinja_template_dag_no_dag_listener.json @@ -19,7 +19,8 @@ }, "externalUrl": "http://airflow.example.com/tree?dag_id=datahub_emitter_operator_jinja_template_dag", "name": "datahub_emitter_operator_jinja_template_dag", - "description": "An example dag with jinja template" + "description": "An example dag with jinja template", + "env": "PROD" } } }, @@ -146,7 +147,8 @@ "name": "datahub_emitter_operator_jinja_template_dag_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -242,7 +244,7 @@ "name": "datahub_emitter_operator_jinja_template_dag_datahub_emitter_operator_jinja_template_dag_task_manual_run_test", "type": "BATCH_AD_HOC", "created": { - "time": 1726570903946, + "time": 1726621020280, "actor": "urn:li:corpuser:datahub" } } @@ -267,7 +269,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1726570903946, + "timestampMillis": 1726621020280, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -366,7 +368,8 @@ "name": "datahub_emitter_operator_jinja_template_dag_task", "type": { "string": "COMMAND" - } + }, + "env": "PROD" } } }, @@ -441,7 +444,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1726570904070, + "timestampMillis": 1726621020427, "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 90d80dbeec8b2..61b222f8d2dd5 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -84,10 +84,11 @@ def assert_metadata_files_equal( diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: - if isinstance(diff, MCPDiff): + if isinstance(diff, MCPDiff) and diff.is_delta_valid: diff.apply_delta(golden) write_metadata_file(pathlib.Path(golden_path), golden) else: + # Fallback: just overwrite the golden file shutil.copyfile(str(output_path), str(golden_path)) return diff --git a/metadata-ingestion/src/datahub/testing/mcp_diff.py b/metadata-ingestion/src/datahub/testing/mcp_diff.py index eb1d9371cc3c4..95b8e83c7a64a 100644 --- a/metadata-ingestion/src/datahub/testing/mcp_diff.py +++ b/metadata-ingestion/src/datahub/testing/mcp_diff.py @@ -149,6 +149,7 @@ class MCPDiff: aspect_changes: Dict[str, Dict[str, MCPAspectDiff]] # urn -> aspect -> diff urns_added: Set[str] urns_removed: Set[str] + is_delta_valid: bool def __bool__(self) -> bool: return bool(self.aspect_changes) @@ -162,25 +163,31 @@ def create( ) -> "MCPDiff": ignore_paths = [cls.convert_path(path) for path in ignore_paths] + is_delta_valid = True aspect_changes: Dict[str, Dict[str, MCPAspectDiff]] = defaultdict(dict) for urn in golden.keys() | output.keys(): golden_map = golden.get(urn, {}) output_map = output.get(urn, {}) for aspect_name in golden_map.keys() | output_map.keys(): + t1 = golden_map.get(aspect_name, []) + t2 = output_map.get(aspect_name, []) diff = DeepDiff( - t1=golden_map.get(aspect_name, []), - t2=output_map.get(aspect_name, []), + t1=t1, + t2=t2, exclude_regex_paths=ignore_paths, ignore_order=True, custom_operators=[DeltaInfoOperator()], ) if diff: aspect_changes[urn][aspect_name] = MCPAspectDiff.create(diff) + if len(t1) > 1 or len(t2) > 1: + is_delta_valid = False return cls( urns_added=output.keys() - golden.keys(), urns_removed=golden.keys() - output.keys(), aspect_changes=aspect_changes, + is_delta_valid=is_delta_valid, ) @staticmethod @@ -193,9 +200,13 @@ def convert_path(path: str) -> str: ) def apply_delta(self, golden: List[Dict[str, Any]]) -> None: + """Update a golden file to match an output file based on the diff. + + :param golden: Golden file represented as a list of MCPs, altered in-place. + """ aspect_diffs = [v for d in self.aspect_changes.values() for v in d.values()] for aspect_diff in aspect_diffs: - for (_, old, new), diffs in aspect_diff.aspects_changed.items(): + for (_, old, new) in aspect_diff.aspects_changed.keys(): golden[old.delta_info.idx] = new.delta_info.original indices_to_remove = set() @@ -268,7 +279,7 @@ def report_aspect(ga: AspectForDiff, idx: int, msg: str = "") -> str: suffix = "rd" else: suffix = "th" - ordinal = f"{(idx+1)}{suffix} " if idx else "" + ordinal = f"{(idx + 1)}{suffix} " if idx else "" return f"{ordinal}<{ga.aspect_name}> {msg}" @staticmethod