From 298c299cf1ec4713031c5295f3da4af06e023836 Mon Sep 17 00:00:00 2001 From: pie1nthesky <39328908+pie1nthesky@users.noreply.github.com> Date: Wed, 17 Jul 2024 02:06:42 +0300 Subject: [PATCH] feat(ingest/postgres): fetch table sizes for profile (#10864) --- .../src/datahub/ingestion/source/sql/postgres.py | 16 ++++++++++++++++ .../postgres_all_db_mces_with_db_golden.json | 3 ++- .../postgres/postgres_mces_with_db_golden.json | 3 ++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py index 0589a5e39d68e..12c98ef11a654 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/postgres.py @@ -276,3 +276,19 @@ def get_identifier( return f"{self.config.database}.{regular}" current_database = self.get_db_name(inspector) return f"{current_database}.{regular}" + + def add_profile_metadata(self, inspector: Inspector) -> None: + try: + with inspector.engine.connect() as conn: + for row in conn.execute( + """SELECT table_catalog, table_schema, table_name, pg_table_size('"' || table_catalog || '"."' || table_schema || '"."' || table_name || '"') AS table_size FROM information_schema.TABLES""" + ): + self.profile_metadata_info.dataset_name_to_storage_bytes[ + self.get_identifier( + schema=row.table_schema, + entity=row.table_name, + inspector=inspector, + ) + ] = row.table_size + except Exception as e: + logger.error(f"failed to fetch profile metadata: {e}") diff --git a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json index b9b2a3b2141a8..f35ff9fdb9d15 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_all_db_mces_with_db_golden.json @@ -832,7 +832,8 @@ { "fieldPath": "metadata_json" } - ] + ], + "sizeInBytes": 16384 } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json index 832b46e096ae0..f47789fc470cd 100644 --- a/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json +++ b/metadata-ingestion/tests/integration/postgres/postgres_mces_with_db_golden.json @@ -600,7 +600,8 @@ }, "rowCount": 2, "columnCount": 9, - "fieldProfiles": [] + "fieldProfiles": [], + "sizeInBytes": 16384 } }, "systemMetadata": {