From 20e2cc7eca92cc61b7761adc7793f2137f8c1994 Mon Sep 17 00:00:00 2001 From: dushayntAW <158567391+dushayntAW@users.noreply.github.com> Date: Mon, 15 Apr 2024 20:15:08 +0530 Subject: [PATCH] fix(ingest/csv): add support multiple ownership type for the same dataset (#10287) --- .../datahub/ingestion/source/csv_enricher.py | 19 ++++++++---- .../csv-enricher/csv_enricher_golden.json | 30 +++++++++++++++++++ .../csv-enricher/csv_enricher_test_data.csv | 3 +- 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index 7a2dfa7ae0705..ec3d1715aaece 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -228,12 +228,19 @@ def get_resource_owners_work_unit( # If we want to overwrite or there are no existing tags, create a new GlobalTags object current_ownership = OwnershipClass(owners, lastModified=get_audit_stamp()) else: - current_owner_urns: Set[str] = set( - [owner.owner for owner in current_ownership.owners] - ) - owners_filtered: List[OwnerClass] = [ - owner for owner in owners if owner.owner not in current_owner_urns - ] + owners_filtered: List[OwnerClass] = [] + for owner in owners: + owner_exists = False + for current_owner in current_ownership.owners: + if ( + owner.owner == current_owner.owner + and owner.type == current_owner.type + ): + owner_exists = True + break + if not owner_exists: + owners_filtered.append(owner) + # If there are no new owners to add, we don't need to emit a work unit. if len(owners_filtered) <= 0: return None diff --git a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json index d56eb50843dd0..9dfd597615c5a 100644 --- a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json +++ b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_golden.json @@ -306,6 +306,36 @@ "runId": "test-csv-enricher" } }, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,baz)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:datahub", + "type": "BUSINESS_OWNER" + }, + { + "owner": "urn:li:corpuser:jdoe", + "type": "BUSINESS_OWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 1643871600000, + "actor": "urn:li:corpuser:ingestion" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "test-csv-enricher", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dashboard", "entityUrn": "urn:li:dashboard:(looker,baz)", diff --git a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_test_data.csv b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_test_data.csv index 1589d1ea47652..fdbb4dabaf402 100644 --- a/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_test_data.csv +++ b/metadata-ingestion/tests/integration/csv-enricher/csv_enricher_test_data.csv @@ -4,7 +4,7 @@ resource,subresource,glossary_terms,tags,owners,ownership_type,description,domai "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)",field_bar,,[urn:li:tag:Legacy],,,field_bar? "urn:li:container:DATABASE",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,container description,urn:li:domain:Engineering "urn:li:chart:(looker,baz1)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering -"urn:li:dashboard:(looker,baz)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering +"urn:li:dashboard:(looker,baz)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],BUSINESS_OWNER,new description,urn:li:domain:Engineering "urn:li:mlFeature:(test_feature_table_all_feature_dtypes,test_BOOL_LIST_feature)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering "urn:li:mlFeatureTable:(urn:li:dataPlatform:feast,user_features)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering "urn:li:mlPrimaryKey:(test_feature_table_all_feature_dtypes,dummy_entity_1)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering @@ -13,3 +13,4 @@ resource,subresource,glossary_terms,tags,owners,ownership_type,description,domai "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_123)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering "urn:li:dataFlow:(airflow,dag_abc,PROD)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering "urn:li:notebook:(querybook,1234)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering +"urn:li:dashboard:(looker,baz)",,[urn:li:glossaryTerm:CustomerAccount],[urn:li:tag:Legacy],[urn:li:corpuser:datahub|urn:li:corpuser:jdoe],TECHNICAL_OWNER,new description,urn:li:domain:Engineering