Skip to content

Commit

Permalink
fix(ingest/bigquery): increase logging in bigquery-queries extractor
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 committed Nov 1, 2024
1 parent 00a2751 commit 7fff89c
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryQueriesSourceConfig):
schema_api=BigQuerySchemaApi(
self.report.schema_api_perf,
self.connection,
projects_client=self.config.connection.get_projects_client(),
projects_client=self.connection,
),
config=self.config,
structured_report=self.report,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,18 +276,23 @@ def get_workunits_internal(
logger.info(f"Found {self.report.num_unique_queries} unique queries")

with self.report.audit_log_load_timer, queries_deduped:
i = 0
last_log_time = datetime.now()
last_report_time = datetime.now()
for _, query_instances in queries_deduped.items():
for query in query_instances.values():
if i > 0 and i % 10000 == 0:
now = datetime.now()
if (now - last_log_time).total_seconds() >= 60:
logger.info(
f"Added {i} query log equeries_dedupedntries to SQL aggregator"
f"Added {i} deduplicated query log entries to SQL aggregator"
)
last_log_time = now

if (now - last_report_time).total_seconds() >= 300:
if self.report.sql_aggregator:
logger.info(self.report.sql_aggregator.as_string())
last_report_time = now

self.aggregator.add(query)
i += 1

yield from auto_workunit(self.aggregator.gen_metadata())

Expand Down

0 comments on commit 7fff89c

Please sign in to comment.