Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbt] br_me_rais #795

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/br_me_rais/br_me_rais__dicionario.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{ config(alias="dicionario", schema="br_me_rais") }}
-- Dicionário da Rais

select
safe_cast(id_tabela as string) id_tabela,
safe_cast(nome_coluna as string) nome_coluna,
Expand Down
27 changes: 15 additions & 12 deletions models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@
cluster_by=["sigla_uf"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(quantidade_vinculos_ativos as int64) quantidade_vinculos_ativos,
safe_cast(quantidade_vinculos_clt as int64) quantidade_vinculos_clt,
safe_cast(
Expand All @@ -23,7 +24,7 @@ select
safe_cast(natureza as string) natureza_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(tamanho as string) tamanho_estabelecimento,
safe_cast(tipo as string) tipo_estabelecimento,
safe_cast(regexp_replace(tipo, r'^0+', '') as string) as tipo_estabelecimento,
safe_cast(indicador_cei_vinculado as int64) indicador_cei_vinculado,
safe_cast(indicador_pat as int64) indicador_pat,
safe_cast(indicador_simples as string) indicador_simples,
Expand All @@ -32,22 +33,24 @@ select
safe_cast(cnae_1 as string) cnae_1,
safe_cast(cnae_2 as string) cnae_2,
safe_cast(cnae_2_subclasse as string) cnae_2_subclasse,
cast(
cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as string
) as subsetor_ibge,
safe_cast(regexp_replace(subsetor_ibge, r'^0+', '') as string) as subsetor_ibge,
safe_cast(subatividade_ibge as string) subatividade_ibge,
case
when length(cep) = 7 then lpad(cep, 8, '0') else cast(cep as string)
when length(cep) = 7 then lpad(cep, 8, '0') else safe_cast(cep as string)
end as cep,
case
when bairros_sp = '????????????'
then null
else cast(regexp_replace(bairros_sp, r'^0+', '') as string)
else trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string))
end as bairros_sp,
cast(regexp_replace(distritos_sp, r'^0+', '') as string) as distritos_sp,
cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) as bairros_fortaleza,
nullif(cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') as bairros_rj,
cast(
regexp_replace(regioes_administrativas_df, r'^0+', '') as string
trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp,
trim(
safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string)
) as bairros_fortaleza,
trim(
nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '')
) as bairros_rj,
trim(
safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string)
) as regioes_administrativas_df
from `basedosdados-staging.br_me_rais_staging.microdados_estabelecimentos` as t
34 changes: 19 additions & 15 deletions models/br_me_rais/br_me_rais__microdados_vinculos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
safe_cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(tipo_vinculo as string) tipo_vinculo,
safe_cast(vinculo_ativo_3112 as string) vinculo_ativo_3112,
safe_cast(tipo_admissao as string) tipo_admissao,
Expand All @@ -27,7 +27,7 @@ select
safe_cast(causa_desligamento_3 as string) causa_desligamento_3,
safe_cast(faixa_tempo_emprego as string) faixa_tempo_emprego,
safe_cast(faixa_horas_contratadas as string) faixa_horas_contratadas,
safe_cast(tempo_emprego as float64) tempo_emprego,
round(safe_cast(tempo_emprego as float64), 2) tempo_emprego,
tricktx marked this conversation as resolved.
Show resolved Hide resolved
safe_cast(quantidade_horas_contratadas as int64) quantidade_horas_contratadas,
safe_cast(id_municipio_trabalho as string) id_municipio_trabalho,
safe_cast(quantidade_dias_afastamento as int64) quantidade_dias_afastamento,
Expand All @@ -37,10 +37,14 @@ select
indicador_trabalho_intermitente as string
) indicador_trabalho_intermitente,
safe_cast(faixa_remuneracao_media_sm as string) faixa_remuneracao_media_sm,
safe_cast(valor_remuneracao_media_sm as float64) valor_remuneracao_media_sm,
round(
safe_cast(valor_remuneracao_media_sm as float64), 2
) valor_remuneracao_media_sm,
safe_cast(valor_remuneracao_media as float64) valor_remuneracao_media,
safe_cast(faixa_remuneracao_dezembro_sm as string) faixa_remuneracao_dezembro_sm,
safe_cast(valor_remuneracao_dezembro_sm as float64) valor_remuneracao_dezembro_sm,
round(
safe_cast(valor_remuneracao_dezembro_sm as float64), 2
) valor_remuneracao_dezembro_sm,
safe_cast(valor_remuneracao_janeiro as float64) valor_remuneracao_janeiro,
safe_cast(valor_remuneracao_fevereiro as float64) valor_remuneracao_fevereiro,
safe_cast(valor_remuneracao_marco as float64) valor_remuneracao_marco,
Expand Down Expand Up @@ -82,19 +86,19 @@ select
then 'Não identificado'
when tipo_estabelecimento = 'CEI/CNO'
then 'CEI'
else tipo_estabelecimento
else safe_cast(regexp_replace(tipo_estabelecimento, r'^0+', '') as string)
end as tipo_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(indicador_simples as string) indicador_simples,
cast(cast(regexp_replace(bairros_sp, r'^0+', '') as int64) as string) as bairros_sp,
cast(
cast(regexp_replace(distritos_sp, r'^0+', '') as int64) as string
) as distritos_sp,
cast(
cast(regexp_replace(bairros_fortaleza, r'^0+', '') as int64) as string
trim(safe_cast(regexp_replace(bairros_sp, r'^0+', '') as string)) as bairros_sp,
trim(safe_cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp,
trim(
safe_cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string)
) as bairros_fortaleza,
cast(cast(regexp_replace(bairros_rj, r'^0+', '') as int64) as string) as bairros_rj,
cast(
cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as int64) as string
) as regioes_administrativas_df,
trim(
nullif(safe_cast(regexp_replace(bairros_rj, r'^0+', '') as string), '')
) as bairros_rj,
trim(
safe_cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string)
) as regioes_administrativas_df
from `basedosdados-staging.br_me_rais_staging.microdados_vinculos`
Loading
Loading