Skip to content

Commit

Permalink
Merge branch 'main' into br_ibge_criacao_municipio
Browse files Browse the repository at this point in the history
  • Loading branch information
laura-l-amaral authored Sep 17, 2024
2 parents cbf82d6 + d13de34 commit ea6d1aa
Show file tree
Hide file tree
Showing 456 changed files with 104,122 additions and 7,264 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/cd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,4 @@ jobs:
run: poetry install --only=dev
- name: Run script for changing metadata status
run: |-
python .github/workflows/scripts/change_metadata_status.py --modified-files ${{ steps.changed-files.outputs.all_modified_files }} --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --status published --email ${{ secrets.BACKEND_EMAIL }} --password ${{ secrets.BACKEND_PASSWORD }}
poetry run python .github/workflows/scripts/change_metadata_status.py --modified-files ${{ steps.changed-files.outputs.all_modified_files }} --graphql-url ${{ secrets.BACKEND_GRAPHQL_URL }} --status published --email ${{ secrets.BACKEND_EMAIL }} --password ${{ secrets.BACKEND_PASSWORD }}
34 changes: 34 additions & 0 deletions .github/workflows/check-bq-project-name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---
name: Check BQ project name
on:
workflow_dispatch:
pull_request:
paths: ['**/*.sql']
jobs:
check_bucket_name:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Get changed files
id: get_files
uses: dorny/paths-filter@v2
with:
list-files: shell
filters: |
pr:
- added|deleted|modified: '**'
- name: Install Python
uses: actions/setup-python@v2
with:
python-version: 3.x
- name: Run Python script
run: |-
for file in ${{ steps.get_files.outputs.pr_files }}; do
if [[ $file == *.sql ]]; then
echo "SQL file detected: $file"
python .github/workflows/scripts/check_sql_files.py $file
else
echo "Não é um arquivo SQL: $file"
fi
done
4 changes: 2 additions & 2 deletions .github/workflows/ci-dbt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name: CI dbt
on:
pull_request:
paths: ['**.sql', '**.yaml', '**.yml']
branches: [main]
jobs:
lint:
name: Lint dbt
Expand All @@ -22,4 +22,4 @@ jobs:
- name: Lint sql
run: poetry run sqlfmt --diff .
- name: Lint yaml
run: poetry run yamlfix --exclude ".kubernetes/**/*" .
run: poetry run yamlfix --exclude ".kubernetes/**/*" --check .
36 changes: 36 additions & 0 deletions .github/workflows/elementary.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
name: Deploy Elementary Report
on:
push:
branches: [main, master]
schedule:
- cron: 00 22 * * 1-5
workflow_dispatch:
jobs:
elementary:
runs-on: ubuntu-latest
steps:
- name: Checkout dbt project
uses: actions/checkout@v3
- name: Run Elementary
uses: elementary-data/run-elementary-action@v1.12
with:
warehouse-type: bigquery
adapter-version: 1.5.9
profiles-yml: ${{ secrets.ELEMENTARY_PROFILES_YML }}
edr-command: edr report --file-path "report.html" --days-back 90 && edr
send-report --google-service-account-path "/tmp/gcs_keyfile.json" --gcs-bucket-name
"basedosdados" --update-bucket-website "true" --days-back 90
bigquery-keyfile: ${{ secrets.BIGQUERY_KEYFILE }}
gcs-keyfile: ${{ secrets.GCS_KEYFILE }}
- name: Upload report
uses: actions/upload-artifact@v3
with:
name: report.html
path: report.html
- name: Upload log
if: always()
uses: actions/upload-artifact@v3
with:
name: edr.log
path: edr.log
24 changes: 24 additions & 0 deletions .github/workflows/scripts/check_sql_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import argparse
import os

def check_sql_files(file):
found_staging = False
if os.path.exists(file) and file.endswith(".sql"):
with open(file, "r") as f:
lines = f.readlines()
for line in lines:
if "basedosdados-dev" in line:
found_staging = True
print(f"Found 'basedosdados-dev' in {file}")
break
return found_staging

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Check for 'basedosdados-dev' occurrences in SQL files.")
parser.add_argument("file", help="Path to the SQL file to check")
args = parser.parse_args()

if check_sql_files(args.file):
exit(1)
else:
print("No occurrences of 'basedosdados-staging' found in SQL files.")
20 changes: 10 additions & 10 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def push_table_to_bq(
Dataset(dataset_id).update(mode="prod")
delete_storage_path = file_path.replace("./downloaded_data/", "")
print(
f"DELETE HEADER FILE FROM basedosdados/staing/{dataset_id}_staging/{table_id}/{delete_storage_path}"
f"DELETE HEADER FILE FROM basedosdados/staging/{dataset_id}_staging/{table_id}/{delete_storage_path}"
)
st = Storage(dataset_id=dataset_id, table_id=table_id)
st.delete_file(filename=delete_storage_path, mode="staging")
Expand Down Expand Up @@ -146,27 +146,27 @@ def save_header_files(dataset_id, table_id):
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_name = blob_path.split("/")[-1]
file_type = file_name.split(".")[-1]

path = Path(blob_path.replace(f"/{file_name}", ""))
path.mkdir(parents=True, exist_ok=True)

### save table header in storage
if file_type == "csv":
print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_path = f"./{path}/table_approve_temp_file_271828.csv"
df.to_csv(file_path, index=False)
elif file_type == "parquet":
file_path = f"./{path}/table_approve_temp_file_271828.parquet"
df.to_parquet(file_path)
blob.download_to_filename(file_path)
print("SAVE HEADER FILE: ", file_path)
return file_path

Expand Down
Loading

0 comments on commit ea6d1aa

Please sign in to comment.