Skip to content

Commit

Permalink
Merge pull request #786 from basedosdados/br_rf_arrecadacao_staging
Browse files Browse the repository at this point in the history
[dados] br_rf_arrecadacao
  • Loading branch information
Winzen authored Nov 7, 2024
2 parents 5c8a4ef + a56c15a commit ab6d702
Show file tree
Hide file tree
Showing 13 changed files with 928 additions and 124 deletions.
36 changes: 36 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__cnae.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{{
config(
schema="br_rf_arrecadacao",
alias="cnae",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2016, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(secao_sigla as string) secao_sigla,
safe_cast(imposto_importacao as float64) imposto_importacao,
safe_cast(imposto_exportacao as float64) imposto_exportacao,
safe_cast(ipi as float64) ipi,
safe_cast(irpf as float64) irpf,
safe_cast(irpj as float64) irpj,
safe_cast(irrf as float64) irrf,
safe_cast(iof as float64) iof,
safe_cast(itr as float64) itr,
safe_cast(cofins as float64) cofins,
safe_cast(pis_pasep as float64) pis_pasep,
safe_cast(csll as float64) csll,
safe_cast(cide_combustiveis as float64) cide_combustiveis,
safe_cast(contribuicao_previdenciaria as float64) contribuicao_previdenciaria,
safe_cast(cpsss as float64) cpsss,
safe_cast(pagamento_unificado as float64) pagamento_unificado,
safe_cast(outras_receitas_rfb as float64) outras_receitas_rfb,
safe_cast(demais_receitas as float64) demais_receitas,
from `basedosdados-staging.br_rf_arrecadacao_staging.cnae` as t
26 changes: 26 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__ir_ipi.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{{
config(
schema="br_rf_arrecadacao",
alias="ir_ipi",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2019, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(tributo as string) tributo,
safe_cast(decendio as string) decendio,
safe_cast(arrecadacao_bruta as float64) arrecadacao_bruta,
safe_cast(retificacao as float64) retificacao,
safe_cast(compensacao as float64) compensacao,
safe_cast(restituicao as float64) restituicao,
safe_cast(outros as float64) outros,
safe_cast(arrecadacao_liquida as float64) arrecadacao_liquida,
from `basedosdados-staging.br_rf_arrecadacao_staging.ir_ipi` as t
22 changes: 22 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__itr.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{
config(
schema="br_rf_arrecadacao",
alias="itr",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2017, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

select
safe_cast(ano as int64) ano,
safe_cast(mes as int64) mes,
safe_cast(sigla_uf as string) sigla_uf,
safe_cast(sigla_regiao as string) sigla_regiao,
safe_cast(cidade as string) cidade,
safe_cast(valor_arrecadado as float64) valor_arrecadado,
from `basedosdados-staging.br_rf_arrecadacao_staging.itr` as t
47 changes: 47 additions & 0 deletions models/br_rf_arrecadacao/br_rf_arrecadacao__natureza_juridica.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{{
config(
schema="br_rf_arrecadacao",
alias="natureza_juridica",
materialized="table",
partition_by={
"field": "ano",
"data_type": "int64",
"range": {"start": 2016, "end": 2024, "interval": 1},
},
cluster_by=["mes"],
)
}}

with
referencia_codigo as (
select
id_natureza_juridica,
substr(cast(id_natureza_juridica as string), 0, 3) as inicio_codigo
from basedosdados - staging.br_bd_diretorios_brasil.natureza_juridica
)
select
safe_cast(t.ano as int64) ano,
safe_cast(t.mes as int64) mes,
safe_cast(
referencia_codigo.id_natureza_juridica as string
) natureza_juridica_codigo,
safe_cast(t.imposto_importacao as float64) imposto_importacao,
safe_cast(t.imposto_exportacao as float64) imposto_exportacao,
safe_cast(t.ipi as float64) ipi,
safe_cast(t.irpf as float64) irpf,
safe_cast(t.irpj as float64) irpj,
safe_cast(t.irrf as float64) irrf,
safe_cast(t.iof as float64) iof,
safe_cast(t.itr as float64) itr,
safe_cast(t.cofins as float64) cofins,
safe_cast(t.pis_pasep as float64) pis_pasep,
safe_cast(t.csll as float64) csll,
safe_cast(t.cide_combustiveis as float64) cide_combustiveis,
safe_cast(t.contribuicao_previdenciaria as float64) contribuicao_previdenciaria,
safe_cast(t.cpsss as float64) cpsss,
safe_cast(t.pagamento_unificado as float64) pagamento_unificado,
safe_cast(t.outras_receitas_rfb as float64) outras_receitas_rfb,
safe_cast(t.demais_receitas as float64) demais_receitas,
from `basedosdados-staging.br_rf_arrecadacao_staging.natureza_juridica` as t
inner join
referencia_codigo on t.natureza_juridica_codigo = referencia_codigo.inicio_codigo
50 changes: 50 additions & 0 deletions models/br_rf_arrecadacao/code/clean_cnae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import numpy as np
import pandas as pd
from clean_functions import *

def rename_columns(df):
name_dict = {
'Ano':'ano',
'Mês':'mes',
'Seção - Sigla':'secao_sigla',
'Seção - Nome':'secao_nome',
'II':'imposto_importacao',
'IE':'imposto_exportacao',
'IPI':'ipi',
'IRPF':'irpf',
'IRPJ':'irpj',
'IRRF':'irrf',
'IOF':'iof',
'ITR':'itr',
'Cofins':'cofins',
'Pis/Pasep':'pis_pasep',
'CSLL':'csll',
'Cide': 'cide_combustiveis',
'Contribuição Previdenciária':'contribuicao_previdenciaria',
'CPSSS':'cpsss',
'Pagamento Unificado':'pagamento_unificado',
'Outras Receitas Administradas':'outras_receitas_rfb',
'Receitas Não Administradas':'demais_receitas'
}

return df.rename(columns=name_dict)

def change_types(df):
df['ano'] = df['ano'].astype('int')
df['mes'] = get_month_number(df['mes'])
df['secao_nome'] = df['secao_nome'].str.title()

#All remaining columns are monetary values
for col in df.columns[4:]:
df[col] = df[col].apply(replace_commas).apply(remove_dots).astype('float')

return df

if __name__ == '__main__':
df = read_data(file_dir='../input/arrecadacao-cnae.csv')
df = remove_empty_columns(df)
df = remove_empty_rows(df)
df = rename_columns(df)
df = change_types(df)
save_data(df=df,file_dir='../output/br_rf_arrecadacao_cnae',partition_cols=['ano','mes'])
Loading

0 comments on commit ab6d702

Please sign in to comment.