Skip to content

Commit

Permalink
Merge pull request #550 from basedosdados/staging/add-incremental-tests
Browse files Browse the repository at this point in the history
[dbt] add macro and modify tests
  • Loading branch information
folhesgabriel authored Apr 18, 2024
2 parents 78fea7d + dd78818 commit 2458dd9
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 75 deletions.
47 changes: 47 additions & 0 deletions macros/custom_get_where_subquery.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- This macro is used to get a subquery with a where clause that can be used in a test
-- to filter the data to be tested. The macro looks for a where clause in the model's
-- config (schema.yml) and replaces the placeholder "__most_recent_year_month__" with
-- the maximum
-- year and month found in the relation. The macro returns a subquery with the where
-- thats used
-- to filter the data to be tested
{% macro get_where_subquery(relation) -%}
{% set where = config.get("where", "") %}

{% if where %}
{% if "__most_recent_year_month__" in where %}
{# Construct a query to find the maximum date using ano and mes columns #}
{% set max_date_query = (
"select format_date('%Y-%m', max(date(cast(ano as int64), cast(mes as int64), 1))) as max_date from "
~ relation
) %}
{% set max_date_result = run_query(max_date_query) %}

{% if execute %}
{# % do log(max_date_query, info=True) %#}
{# % do log(max_date_result, info=True) %#}
{# Extract the maximum year and month from the max_date #}
{% set max_date = max_date_result.rows[0][0] %}
{% set max_year = max_date[:4] %}
{% set max_month = max_date[5:7] %}

{# Replace placeholder in the where config with actual maximum year and month #}
{% set where = where | replace(
"__most_recent_year_month__",
"ano = " ~ max_year ~ " and mes = " ~ max_month,
) %}
{% do log(
"----- The test will be performed for: " ~ where, info=True
) %}

{% endif %}
{% endif %}

{%- set filtered -%}
(select * from {{ relation }} where {{ where }}) dbt_subquery
{%- endset -%}

{% do return(filtered) %}
{% else %} {% do return(relation) %}
{% endif %}
{%- endmacro %}
52 changes: 48 additions & 4 deletions models/br_bcb_estban/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,28 +14,45 @@ models:
- id_municipio
- cnpj_basico
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
config:
where: __most_recent_year_month__
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj_basico
config:
where: __most_recent_year_month__
- name: instituicao
description: Nome da Instituição Financeira (IF)
- name: agencias_esperadas
Expand All @@ -44,6 +61,11 @@ models:
description: Quantidade de agências processadas da IF no município
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: valor
description: Valores
- name: br_bcb_estban__agencia
Expand All @@ -59,21 +81,32 @@ models:
- id_municipio
- cnpj_agencia
- id_verbete
- not_null_proportion_multiple_columns:
at_least: 0.05
config:
where: __most_recent_year_month__
columns:
- name: ano
description: Ano
- name: mes
description: Mês
- name: sigla_uf
description: Sigla da Unidade da Federação
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: id_municipio
description: ID Município - IBGE 7 Dígitos
tests:
- relationships:
to: ref('br_bd_diretorios_brasil__municipio')
field: id_municipio
config:
where: __most_recent_year_month__
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: cnpj_basico
description: Cadastro Nacional de Pessoa Jurídica (CNPJ) básico (8 primeiros
dígitos)
Expand All @@ -82,10 +115,21 @@ models:
- name: cnpj_agencia
description: CNPJ da agência bancária
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- relationships:
to: ref('br_bd_diretorios_brasil__empresa')
field: cnpj
config:
where: __most_recent_year_month__
- name: id_verbete
description: Código do verbete
tests:
- dbt_utils.not_null_proportion:
at_least: 0.01
config:
where: __most_recent_year_month__
- name: valor
description: Valores
12 changes: 2 additions & 10 deletions models/br_ibge_pam/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@ models:
e valor da produção das lavouras temporárias
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- sigla_uf
- id_municipio
- produto
combination_of_columns: [ano, sigla_uf, id_municipio, produto]
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
Expand Down Expand Up @@ -49,11 +45,7 @@ models:
médio e valor da produção das lavouras permanentes
tests:
- dbt_utils.unique_combination_of_columns:
combination_of_columns:
- ano
- sigla_uf
- id_municipio
- produto
combination_of_columns: [ano, sigla_uf, id_municipio, produto]
- not_null_proportion_multiple_columns:
at_least: 0.05
columns:
Expand Down
7 changes: 7 additions & 0 deletions models/br_ms_sia/br_ms_sia__producao_ambulatorial.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,13 @@ with
from `basedosdados.br_bd_diretorios_brasil.municipio`
) as mun
on producao_ambulatorial.pa_ufmun = mun.id_municipio_6
{% if is_incremental() %}
where
date(cast(ano as int64), cast(mes as int64), 1) not in (
select distinct (date(cast(ano as int64), cast(mes as int64), 1))
from {{ this }}
)
{% endif %}
),

sia as (
Expand Down
16 changes: 11 additions & 5 deletions models/br_ms_sia/br_ms_sia__psicossocial.sql
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
config(
alias="psicossocial",
schema="br_ms_sia",
materialized="incremental",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
Expand Down Expand Up @@ -33,7 +33,13 @@ with
from `basedosdados.br_bd_diretorios_brasil.municipio`
) as mun_res
on psicossocial.munpac = mun_res.id_municipio_6

{% if is_incremental() %}
where
date(cast(ano as int64), cast(mes as int64), 1) > (
select max(date(cast(ano as int64), cast(mes as int64), 1))
from {{ this }}
)
{% endif %}
)

select
Expand All @@ -51,7 +57,7 @@ select
-- safe_cast(tippre__ as string) tipo_prestador,
-- safe_cast(mn_ind as string) tipo_mantenedor_estabelecimento,
-- safe_cast(cnpjcpf as string) cnpj_estabelecimento_executante,
-- safe_cast(cnpjmnt as string) cnpj_mantenedora_estabalecimento,
-- safe_cast(cnpjmnt as string) cnpj_mantenedora_estabelecimento,
-- safe_cast(nat_jur as string) natureza_juridica_estabelecimento,
safe_cast(pa_proc_id as string) id_procedimento_ambulatorial,
safe_cast(pa_srv as string) id_servico_especializado,
Expand Down Expand Up @@ -99,7 +105,7 @@ select
length(trim(cidpri)) = 3
and cidpri in (
select subcategoria
from `basedosdados-dev.br_bd_diretorios_brasil.cid_10`
from `basedosdados.br_bd_diretorios_brasil.cid_10`
where length(subcategoria) = 3
)
then cidpri
Expand All @@ -119,7 +125,7 @@ select
length(trim(cidassoc)) = 3
and cidassoc in (
select subcategoria
from `basedosdados-dev.br_bd_diretorios_brasil.cid_10`
from `basedosdados.br_bd_diretorios_brasil.cid_10`
where length(subcategoria) = 3
)
then cidassoc
Expand Down
Loading

0 comments on commit 2458dd9

Please sign in to comment.