Skip to content

Commit

Permalink
add new code rais and update rais to 2023
Browse files Browse the repository at this point in the history
  • Loading branch information
tricktx committed Oct 18, 2024
1 parent f944726 commit 416e065
Show file tree
Hide file tree
Showing 6 changed files with 798 additions and 49 deletions.
2 changes: 1 addition & 1 deletion models/br_me_rais/br_me_rais__dicionario.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{{ config(alias="dicionario", schema="br_me_rais") }}
-- Dicionário da Rais

select
safe_cast(id_tabela as string) id_tabela,
safe_cast(nome_coluna as string) nome_coluna,
Expand Down
25 changes: 16 additions & 9 deletions models/br_me_rais/br_me_rais__microdados_estabelecimentos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
"data_type": "int64",
"range": {"start": 1985, "end": 2023, "interval": 1},
},
cluster_by=["sigla_uf"],
cluster_by=["sigla_uf", "id_municipio"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(quantidade_vinculos_ativos as int64) quantidade_vinculos_ativos,
safe_cast(quantidade_vinculos_clt as int64) quantidade_vinculos_clt,
safe_cast(
Expand All @@ -23,7 +24,9 @@ select
safe_cast(natureza as string) natureza_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(tamanho as string) tamanho_estabelecimento,
safe_cast(tipo as string) tipo_estabelecimento,
cast(
cast(regexp_replace(tipo, r'^0+', '') as string) as string
) as tipo_estabelecimento,
safe_cast(indicador_cei_vinculado as int64) indicador_cei_vinculado,
safe_cast(indicador_pat as int64) indicador_pat,
safe_cast(indicador_simples as string) indicador_simples,
Expand All @@ -42,12 +45,16 @@ select
case
when bairros_sp = '????????????'
then null
else cast(regexp_replace(bairros_sp, r'^0+', '') as string)
else trim(cast(regexp_replace(bairros_sp, r'^0+', '') as string))
end as bairros_sp,
cast(regexp_replace(distritos_sp, r'^0+', '') as string) as distritos_sp,
cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string) as bairros_fortaleza,
nullif(cast(regexp_replace(bairros_rj, r'^0+', '') as string), '') as bairros_rj,
cast(
regexp_replace(regioes_administrativas_df, r'^0+', '') as string
trim(cast(regexp_replace(distritos_sp, r'^0+', '') as string)) as distritos_sp,
trim(
cast(regexp_replace(bairros_fortaleza, r'^0+', '') as string)
) as bairros_fortaleza,
trim(
nullif(cast(regexp_replace(bairros_rj, r'^0+', '') as string), '')
) as bairros_rj,
trim(
cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as string)
) as regioes_administrativas_df
from `basedosdados-staging.br_me_rais_staging.microdados_estabelecimentos` as t
45 changes: 31 additions & 14 deletions models/br_me_rais/br_me_rais__microdados_vinculos.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
"data_type": "int64",
"range": {"start": 1985, "end": 2023, "interval": 1},
},
cluster_by=["sigla_uf"],
cluster_by=["sigla_uf", "id_municipio"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(id_municipio as string) id_municipio,
cast(regexp_replace(id_municipio, r'\.0$', '') as string) id_municipio,
safe_cast(tipo_vinculo as string) tipo_vinculo,
safe_cast(vinculo_ativo_3112 as string) vinculo_ativo_3112,
safe_cast(tipo_admissao as string) tipo_admissao,
Expand All @@ -27,7 +27,7 @@ select
safe_cast(causa_desligamento_3 as string) causa_desligamento_3,
safe_cast(faixa_tempo_emprego as string) faixa_tempo_emprego,
safe_cast(faixa_horas_contratadas as string) faixa_horas_contratadas,
safe_cast(tempo_emprego as float64) tempo_emprego,
round(safe_cast(tempo_emprego as float64), 2) tempo_emprego,
safe_cast(quantidade_horas_contratadas as int64) quantidade_horas_contratadas,
safe_cast(id_municipio_trabalho as string) id_municipio_trabalho,
safe_cast(quantidade_dias_afastamento as int64) quantidade_dias_afastamento,
Expand All @@ -37,10 +37,14 @@ select
indicador_trabalho_intermitente as string
) indicador_trabalho_intermitente,
safe_cast(faixa_remuneracao_media_sm as string) faixa_remuneracao_media_sm,
safe_cast(valor_remuneracao_media_sm as float64) valor_remuneracao_media_sm,
round(
safe_cast(valor_remuneracao_media_sm as float64), 2
) valor_remuneracao_media_sm,
safe_cast(valor_remuneracao_media as float64) valor_remuneracao_media,
safe_cast(faixa_remuneracao_dezembro_sm as string) faixa_remuneracao_dezembro_sm,
safe_cast(valor_remuneracao_dezembro_sm as float64) valor_remuneracao_dezembro_sm,
round(
safe_cast(valor_remuneracao_dezembro_sm as float64), 2
) valor_remuneracao_dezembro_sm,
safe_cast(valor_remuneracao_janeiro as float64) valor_remuneracao_janeiro,
safe_cast(valor_remuneracao_fevereiro as float64) valor_remuneracao_fevereiro,
safe_cast(valor_remuneracao_marco as float64) valor_remuneracao_marco,
Expand Down Expand Up @@ -82,19 +86,32 @@ select
then 'Não identificado'
when tipo_estabelecimento = 'CEI/CNO'
then 'CEI'
else tipo_estabelecimento
else
cast(
cast(
regexp_replace(tipo_estabelecimento, r'^0+', '') as string
) as string
)
end as tipo_estabelecimento,
safe_cast(natureza_juridica as string) natureza_juridica,
safe_cast(indicador_simples as string) indicador_simples,
cast(cast(regexp_replace(bairros_sp, r'^0+', '') as int64) as string) as bairros_sp,
cast(
cast(regexp_replace(distritos_sp, r'^0+', '') as int64) as string
trim(
cast(cast(regexp_replace(bairros_sp, r'^0+', '') as int64) as string)
) as bairros_sp,
trim(
cast(cast(regexp_replace(distritos_sp, r'^0+', '') as int64) as string)
) as distritos_sp,
cast(
cast(regexp_replace(bairros_fortaleza, r'^0+', '') as int64) as string
trim(
cast(cast(regexp_replace(bairros_fortaleza, r'^0+', '') as int64) as string)
) as bairros_fortaleza,
cast(cast(regexp_replace(bairros_rj, r'^0+', '') as int64) as string) as bairros_rj,
cast(
cast(regexp_replace(regioes_administrativas_df, r'^0+', '') as int64) as string
trim(
cast(cast(regexp_replace(bairros_rj, r'^0+', '') as int64) as string)
) as bairros_rj,
trim(
cast(
cast(
regexp_replace(regioes_administrativas_df, r'^0+', '') as int64
) as string
)
) as regioes_administrativas_df,
from `basedosdados-staging.br_me_rais_staging.microdados_vinculos`
Loading

0 comments on commit 416e065

Please sign in to comment.