Skip to content

Commit

Permalink
Added script to search for missing batches in billing.
Browse files Browse the repository at this point in the history
  • Loading branch information
milo-hyben committed Jul 12, 2024
1 parent e7d5cb9 commit bf9c7ae
Showing 1 changed file with 36 additions and 30 deletions.
66 changes: 36 additions & 30 deletions scripts/billing_missing_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
import json
import logging
import os
import re
import sys

import google.cloud.bigquery as bq
import numpy as np
import pandas as pd

# name of the BQ table to insert the records
SM_GCP_BQ_BATCHES_VIEW = os.getenv('SM_GCP_BQ_BATCHES_VIEW')
GCP_PROJECT = os.getenv('GCP_PROJECT')

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand All @@ -38,7 +36,7 @@ def get_min_max_batch_id(
bq_client = bq.Client()
job_config = bq.QueryJobConfig(
query_parameters=[
bq.ScalarQueryParameter("min_day", "STRING", from_day),
bq.ScalarQueryParameter('min_day', 'STRING', from_day),
],
)
query_job_result = bq_client.query(query, job_config=job_config)
Expand All @@ -65,7 +63,7 @@ def get_missing_batches(
),
t as (
SELECT b.batch_id
FROM b LEFT JOIN
FROM b LEFT JOIN
`{SM_GCP_BQ_BATCHES_VIEW}` d on d.batch_id = b.batch_id
WHERE d.batch_id IS NULL
)
Expand All @@ -76,8 +74,8 @@ def get_missing_batches(
bq_client = bq.Client()
job_config = bq.QueryJobConfig(
query_parameters=[
bq.ScalarQueryParameter("min_batch_id", "INT64", min_batch_id),
bq.ScalarQueryParameter("max_batch_id", "INT64", max_batch_id),
bq.ScalarQueryParameter('min_batch_id', 'INT64', min_batch_id),
bq.ScalarQueryParameter('max_batch_id', 'INT64', max_batch_id),
],
)
query_job_result = bq_client.query(query, job_config=job_config)
Expand All @@ -89,6 +87,28 @@ def get_missing_batches(
return results


def get_hail_token() -> str:
"""
TODO Get Hail token from local tokens file
"""
# if os.getenv('DEV') in ('1', 'true', 'yes'):
# with open(os.path.expanduser('~/.hail/tokens.json'), encoding='utf-8') as f:
# config = json.load(f)
# return config['default']

# assert GCP_PROJECT
# secret_value = read_secret(
# GCP_PROJECT,
# 'aggregate-billing-hail-token',
# fail_gracefully=False,
# )
# if not secret_value:
# raise ValueError('Could not find Hail token')

# return secret_value
return ''


def main():
"""
Expect year and optional output path as command line argument
Expand All @@ -98,6 +118,10 @@ def main():
print('SM_GCP_BQ_BATCHES_VIEW is not set')
sys.exit(1)

if not GCP_PROJECT:
print('GCP_PROJECT is not set')
sys.exit(1)

parser = argparse.ArgumentParser()
parser.add_argument(
'-st',
Expand All @@ -116,12 +140,13 @@ def main():
if not missing_batches:
print('No missing batches found')
return

# we need to cross check with Hail Batch API to see if the batch information is available
# otherwise we would get alerts for missing batches when reloading
batches_to_be_loaded = []
for b in missing_batches:

# for b in missing_batches:
# TODO: check if batch is available in Hail
batches_to_be_loaded = missing_batches

# group the batches to be loaded
# if the difference between two consecutive batches is more than 50, then start a new group
Expand All @@ -137,7 +162,7 @@ def main():
batches_group[-1].append(b)

prev_batch_id = b

print('Batches to be loaded, here are relevent URL calls to be made manually:')
for i, group in enumerate(batches_group):
print(f'Group {i+1}: {group}')
Expand All @@ -153,25 +178,6 @@ def main():
)


# if message := request_data.get('message'):
# if attributes := message.get('attributes'):
# if 'batch_ids' in attributes:
# request_data = attributes
# elif 'data' in message:
# # data field can be rubish, esp. when passed from pubsub
# # if it fails, than just return None
# try:
# request_data = json.loads(b64decode(message['data']))
# except ValueError:
# logger.warning(f'Data is invalid JSON: {message["data"]}')
# return None


# https://billing-aggregator-hail-billing-function-e174484-hrc4bbiayq-ts.a.run.app/

# https://billing-aggregator-seqr-billing-function-5d2e4d2-hrc4bbiayq-ts.a.run.app/


if __name__ == '__main__':
# execute main function
main()

0 comments on commit bf9c7ae

Please sign in to comment.