Skip to content

Commit

Permalink
Merge branch 'main' into update-br-inep-sinopse-educacao-basica
Browse files Browse the repository at this point in the history
  • Loading branch information
aspeddro authored Mar 21, 2024
2 parents 4026b84 + f109c10 commit cd21dbd
Show file tree
Hide file tree
Showing 25 changed files with 636 additions and 39 deletions.
20 changes: 10 additions & 10 deletions .github/workflows/scripts/table_approve.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def push_table_to_bq(
Dataset(dataset_id).update(mode="prod")
delete_storage_path = file_path.replace("./downloaded_data/", "")
print(
f"DELETE HEADER FILE FROM basedosdados/staing/{dataset_id}_staging/{table_id}/{delete_storage_path}"
f"DELETE HEADER FILE FROM basedosdados/staging/{dataset_id}_staging/{table_id}/{delete_storage_path}"
)
st = Storage(dataset_id=dataset_id, table_id=table_id)
st.delete_file(filename=delete_storage_path, mode="staging")
Expand Down Expand Up @@ -146,27 +146,27 @@ def save_header_files(dataset_id, table_id):
print("Found blob: ", str(blob.name))
print("Renamed blob: ", blob_path)
break
### save table header in storage

print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_name = blob_path.split("/")[-1]
file_type = file_name.split(".")[-1]

path = Path(blob_path.replace(f"/{file_name}", ""))
path.mkdir(parents=True, exist_ok=True)

### save table header in storage
if file_type == "csv":
print(f"DOWNLOAD HEADER FILE FROM basedosdados-dev.{dataset_id}_staging.{table_id}")
query = f"""
SELECT * FROM `basedosdados-dev.{dataset_id}_staging.{table_id}` LIMIT 1
"""
df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True)
df = df.drop(columns=partitions)

file_path = f"./{path}/table_approve_temp_file_271828.csv"
df.to_csv(file_path, index=False)
elif file_type == "parquet":
file_path = f"./{path}/table_approve_temp_file_271828.parquet"
df.to_parquet(file_path)
blob.download_to_filename(file_path)
print("SAVE HEADER FILE: ", file_path)
return file_path

Expand Down
4 changes: 2 additions & 2 deletions dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ profile: default
vars:
disable_run_results: false
disable_tests_results: false
disable_dbt_artifacts_autoupload: false
disable_dbt_invocation_autoupload: false
disable_dbt_artifacts_autoupload: true
disable_dbt_invocation_autoupload: true
# These configurations specify where dbt should look for different types of files.
# The `source-paths` config, for example, states that models in this project can be
# found in the "models/" directory. You probably won't need to change these!
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{{
config(
alias="auxilio_brasil",
schema="br_cgu_beneficios_cidadao",
materialized="table",
partition_by={
"field": "ano_competencia",
"data_type": "int64",
"range": {
"start": 2020,
"end": 2023,
"interval": 1,
},
},
cluster_by=["sigla_uf", "id_municipio"],
)
}}
select distinct
safe_cast(split(mes_ref, '-')[offset(0)] as int64) as ano_competencia,
safe_cast(split(mes_ref, '-')[offset(1)] as int64) as mes_competencia,
safe_cast(left(mes, 4) as int64) ano_referencia,
safe_cast(right(mes, 2) as int64) mes_referencia,
safe_cast(id_municipio as string) id_municipio,
safe_cast(sigla_uf as string) sigla_uf,
case when cpf = '' then null else cpf end as cpf_favorecido,
safe_cast(nis as string) nis_favorecido,
safe_cast(nome as string) nome_favorecido,
safe_cast(valor_beneficio as float64) valor_parcela,
from `basedosdados-staging.br_cgu_beneficios_cidadao_staging.auxilio_brasil` as t
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{{
config(
alias="bolsa_familia_pagamento",
schema="br_cgu_beneficios_cidadao",
materialized="table",
partition_by={
"field": "ano_competencia",
"data_type": "int64",
"range": {
"start": 2020,
"end": 2023,
"interval": 1,
},
},
cluster_by=["sigla_uf", "id_municipio"],
)
}}

select distinct
safe_cast(split(mes_ref, '-')[offset(0)] as int64) as ano_competencia,
safe_cast(split(mes_ref, '-')[offset(1)] as int64) as mes_competencia,
safe_cast(left(mes, 4) as int64) ano_referencia,
safe_cast(right(mes, 2) as int64) mes_referencia,
safe_cast(id_municipio as string) id_municipio,
safe_cast(sigla_uf as string) sigla_uf,
case when cpf = '' then null else cpf end as cpf_favorecido,
safe_cast(nis as string) nis_favorecido,
safe_cast(nome as string) nome_favorecido,
safe_cast(valor_beneficio as float64) valor_parcela,
from
`basedosdados-staging.br_cgu_beneficios_cidadao_staging.bolsa_familia_pagamento`
as t
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config(
alias="bpc",
schema="br_cgu_beneficios_cidadao",
materialized="table",
materialized="incremental",
partition_by={
"field": "ano_competencia",
"data_type": "int64",
Expand Down Expand Up @@ -43,3 +43,4 @@ with
)
select * except (data)
from bpc
{% if is_incremental() %} where data > (select max(data) from {{ this }}) {% endif %}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config(
alias="garantia_safra",
schema="br_cgu_beneficios_cidadao",
materialized="table",
materialized="incremental",
partition_by={
"field": "ano_referencia",
"data_type": "int64",
Expand Down Expand Up @@ -39,3 +39,4 @@ with
)
select * except (data)
from garantia_safra
{% if is_incremental() %} where data > (select max(data) from {{ this }}) {% endif %}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config(
alias="novo_bolsa_familia",
schema="br_cgu_beneficios_cidadao",
materialized="table",
materialized="incremental",
partition_by={
"field": "ano_competencia",
"data_type": "int64",
Expand Down Expand Up @@ -39,3 +39,4 @@ with
)
select * except (data)
from novo_bolsa_familia
{% if is_incremental() %} where data > (select max(data) from {{ this }}) {% endif %}
145 changes: 142 additions & 3 deletions models/br_cgu_beneficios_cidadao/schema.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
---
version: 2
models:
- name: novo_bolsa_familia
- name: br_cgu_beneficios_cidadao__novo_bolsa_familia
description: Números do Novo Bolsa Família
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- nome_favorecido
- ano_competencia
- mes_competencia
- ano_referencia
- mes_referencia
- id_municipio
- nis_favorecido
- valor_parcela
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano_competencia
description: Ano a que se refere a parcela
Expand Down Expand Up @@ -37,14 +44,16 @@ models:
- name: valor_parcela
description: Valor da parcela do benefício
tests: [not_null]
- name: garantia_safra
- name: br_cgu_beneficios_cidadao__garantia_safra
description: Números do Garantia Safra
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano_referencia
- mes_referencia
- nis_favorecido
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano_referencia
description: Ano da folha de pagamento
Expand All @@ -65,14 +74,16 @@ models:
- name: valor_parcela
description: Valor da parcela do benefício
tests: [not_null]
- name: bpc
- name: br_cgu_beneficios_cidadao__bpc
description: Números do Benefício de Prestação Continuada
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano_competencia
- mes_competencia
- nis_favorecido
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano_competencia
description: Ano a que se refere a parcela
Expand Down Expand Up @@ -109,3 +120,131 @@ models:
- name: valor_parcela
description: Valor da parcela do benefício
tests: [not_null]
- name: br_cgu_beneficios_cidadao__auxilio_brasil
description: Dados sobre o Auxílio Brasil
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano_competencia
- mes_competencia
- ano_referencia
- mes_referencia
- nome_favorecido
- id_municipio
- nis_favorecido
- valor_parcela
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano_competencia
description: Ano a que se refere a parcela
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__ano')
field: ano.ano
- name: mes_competencia
description: Mês a que se refere a parcela
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__mes')
field: mes.mes
- name: ano_referencia
description: Ano da folha de pagamento
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__ano')
field: ano.ano
- name: mes_referencia
description: Mês da folha de pagamento
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__mes')
field: mes.mes
- name: sigla_uf
description: Sigla da Unidade Federativa do beneficiário do Auxílio Brasil
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__uf')
field: sigla
- name: id_municipio
description: Código IBGE do município do beneficiário do Auxílio Brasil
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: cpf_favorecido
description: Número no Cadastro de Pessoas Físicas (CPF) do beneficiário do
Auxílio Brasil, caso possua
- name: nis_favorecido
description: Número de Identificação Social (NIS) do beneficiário do Auxílio
Brasil, caso possua
- name: nome_favorecido
description: Nome do beneficiário do Auxílio Brasil
- name: valor_parcela
description: Valor da parcela do benefício
tests: [not_null]
- name: br_cgu_beneficios_cidadao__bolsa_familia_pagamento
description: Dados sobre o pagamento do bolsa família entre os anos de 2013 até
2021.
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano_competencia
- mes_competencia
- ano_referencia
- mes_referencia
- nome_favorecido
- id_municipio
- cpf_favorecido
- nis_favorecido
- valor_parcela
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
- name: ano_competencia
description: Ano a que se refere a parcela
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__ano')
field: ano.ano
- name: mes_competencia
description: Mês a que se refere a parcela
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__mes')
field: mes.mes
- name: ano_referencia
description: Ano da folha de pagamento
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__ano')
field: ano.ano
- name: mes_referencia
description: Mês da folha de pagamento
tests:
- relationships:
to: ref('br_bd_diretorios_data_tempo__mes')
field: mes.mes
- name: sigla_uf
description: Sigla da Unidade Federativa do beneficiário do Auxílio Brasil
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__uf')
field: sigla
- name: id_municipio
description: Código IBGE do município do beneficiário do Auxílio Brasil
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
- name: cpf_favorecido
description: Número no Cadastro de Pessoas Físicas (CPF) do beneficiário do
Auxílio Brasil, caso possua
- name: nis_favorecido
description: Número de Identificação Social (NIS) do beneficiário do Auxílio
Brasil, caso possua
- name: nome_favorecido
description: Nome do beneficiário do Auxílio Brasil
- name: valor_parcela
description: Valor da parcela do benefício
tests: [not_null]
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="conjunto", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(id, "") as string) id,
safe_cast(nullif(titulo, "") as string) nome,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="organizacao", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(o.id, "") as string) id,
safe_cast(nullif(o.titulo, "") as string) nome,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{{ config(alias="recurso", schema="br_cgu_dados_abertos") }}
--
select
safe_cast(nullif(id, "") as string) id,
safe_cast(nullif(id_conjunto, "") as string) id_conjunto,
Expand Down
4 changes: 4 additions & 0 deletions models/br_cvm_fi/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ models:
- plano_contabil_balancete
- codigo_conta
- cnpj
- cnpj_basico
- ano
- mes
- saldo_conta
description: O balancete é um documento gerado mensalmente pela contabilidade
do fundo.
columns:
Expand Down
6 changes: 3 additions & 3 deletions models/br_geobr_mapas/br_geobr_mapas__municipio.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
}}

select
safe_cast(id_municipio as string) id_municipio,
safe_cast(sigla_uf as string) sigla_uf,
safe.st_geogfromtext(geometria) geometria
safe_cast(replace(code_muni, '.0', '') as string) id_municipio,
safe_cast(abbrev_state as string) sigla_uf,
safe.st_geogfromtext(geometry) geometria
from `basedosdados-staging.br_geobr_mapas_staging.municipio` as t
Loading

0 comments on commit cd21dbd

Please sign in to comment.