-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #786 from basedosdados/br_rf_arrecadacao_staging
[dados] br_rf_arrecadacao
- Loading branch information
Showing
13 changed files
with
928 additions
and
124 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="cnae", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2016, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(secao_sigla as string) secao_sigla, | ||
safe_cast(imposto_importacao as float64) imposto_importacao, | ||
safe_cast(imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(ipi as float64) ipi, | ||
safe_cast(irpf as float64) irpf, | ||
safe_cast(irpj as float64) irpj, | ||
safe_cast(irrf as float64) irrf, | ||
safe_cast(iof as float64) iof, | ||
safe_cast(itr as float64) itr, | ||
safe_cast(cofins as float64) cofins, | ||
safe_cast(pis_pasep as float64) pis_pasep, | ||
safe_cast(csll as float64) csll, | ||
safe_cast(cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(contribuicao_previdenciaria as float64) contribuicao_previdenciaria, | ||
safe_cast(cpsss as float64) cpsss, | ||
safe_cast(pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(outras_receitas_rfb as float64) outras_receitas_rfb, | ||
safe_cast(demais_receitas as float64) demais_receitas, | ||
from `basedosdados-staging.br_rf_arrecadacao_staging.cnae` as t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="ir_ipi", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2019, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(tributo as string) tributo, | ||
safe_cast(decendio as string) decendio, | ||
safe_cast(arrecadacao_bruta as float64) arrecadacao_bruta, | ||
safe_cast(retificacao as float64) retificacao, | ||
safe_cast(compensacao as float64) compensacao, | ||
safe_cast(restituicao as float64) restituicao, | ||
safe_cast(outros as float64) outros, | ||
safe_cast(arrecadacao_liquida as float64) arrecadacao_liquida, | ||
from `basedosdados-staging.br_rf_arrecadacao_staging.ir_ipi` as t |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="itr", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2017, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
select | ||
safe_cast(ano as int64) ano, | ||
safe_cast(mes as int64) mes, | ||
safe_cast(sigla_uf as string) sigla_uf, | ||
safe_cast(sigla_regiao as string) sigla_regiao, | ||
safe_cast(cidade as string) cidade, | ||
safe_cast(valor_arrecadado as float64) valor_arrecadado, | ||
from `basedosdados-staging.br_rf_arrecadacao_staging.itr` as t |
47 changes: 47 additions & 0 deletions
47
models/br_rf_arrecadacao/br_rf_arrecadacao__natureza_juridica.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
{{ | ||
config( | ||
schema="br_rf_arrecadacao", | ||
alias="natureza_juridica", | ||
materialized="table", | ||
partition_by={ | ||
"field": "ano", | ||
"data_type": "int64", | ||
"range": {"start": 2016, "end": 2024, "interval": 1}, | ||
}, | ||
cluster_by=["mes"], | ||
) | ||
}} | ||
|
||
with | ||
referencia_codigo as ( | ||
select | ||
id_natureza_juridica, | ||
substr(cast(id_natureza_juridica as string), 0, 3) as inicio_codigo | ||
from basedosdados - staging.br_bd_diretorios_brasil.natureza_juridica | ||
) | ||
select | ||
safe_cast(t.ano as int64) ano, | ||
safe_cast(t.mes as int64) mes, | ||
safe_cast( | ||
referencia_codigo.id_natureza_juridica as string | ||
) natureza_juridica_codigo, | ||
safe_cast(t.imposto_importacao as float64) imposto_importacao, | ||
safe_cast(t.imposto_exportacao as float64) imposto_exportacao, | ||
safe_cast(t.ipi as float64) ipi, | ||
safe_cast(t.irpf as float64) irpf, | ||
safe_cast(t.irpj as float64) irpj, | ||
safe_cast(t.irrf as float64) irrf, | ||
safe_cast(t.iof as float64) iof, | ||
safe_cast(t.itr as float64) itr, | ||
safe_cast(t.cofins as float64) cofins, | ||
safe_cast(t.pis_pasep as float64) pis_pasep, | ||
safe_cast(t.csll as float64) csll, | ||
safe_cast(t.cide_combustiveis as float64) cide_combustiveis, | ||
safe_cast(t.contribuicao_previdenciaria as float64) contribuicao_previdenciaria, | ||
safe_cast(t.cpsss as float64) cpsss, | ||
safe_cast(t.pagamento_unificado as float64) pagamento_unificado, | ||
safe_cast(t.outras_receitas_rfb as float64) outras_receitas_rfb, | ||
safe_cast(t.demais_receitas as float64) demais_receitas, | ||
from `basedosdados-staging.br_rf_arrecadacao_staging.natureza_juridica` as t | ||
inner join | ||
referencia_codigo on t.natureza_juridica_codigo = referencia_codigo.inicio_codigo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import numpy as np | ||
import pandas as pd | ||
from clean_functions import * | ||
|
||
def rename_columns(df): | ||
name_dict = { | ||
'Ano':'ano', | ||
'Mês':'mes', | ||
'Seção - Sigla':'secao_sigla', | ||
'Seção - Nome':'secao_nome', | ||
'II':'imposto_importacao', | ||
'IE':'imposto_exportacao', | ||
'IPI':'ipi', | ||
'IRPF':'irpf', | ||
'IRPJ':'irpj', | ||
'IRRF':'irrf', | ||
'IOF':'iof', | ||
'ITR':'itr', | ||
'Cofins':'cofins', | ||
'Pis/Pasep':'pis_pasep', | ||
'CSLL':'csll', | ||
'Cide': 'cide_combustiveis', | ||
'Contribuição Previdenciária':'contribuicao_previdenciaria', | ||
'CPSSS':'cpsss', | ||
'Pagamento Unificado':'pagamento_unificado', | ||
'Outras Receitas Administradas':'outras_receitas_rfb', | ||
'Receitas Não Administradas':'demais_receitas' | ||
} | ||
|
||
return df.rename(columns=name_dict) | ||
|
||
def change_types(df): | ||
df['ano'] = df['ano'].astype('int') | ||
df['mes'] = get_month_number(df['mes']) | ||
df['secao_nome'] = df['secao_nome'].str.title() | ||
|
||
#All remaining columns are monetary values | ||
for col in df.columns[4:]: | ||
df[col] = df[col].apply(replace_commas).apply(remove_dots).astype('float') | ||
|
||
return df | ||
|
||
if __name__ == '__main__': | ||
df = read_data(file_dir='../input/arrecadacao-cnae.csv') | ||
df = remove_empty_columns(df) | ||
df = remove_empty_rows(df) | ||
df = rename_columns(df) | ||
df = change_types(df) | ||
save_data(df=df,file_dir='../output/br_rf_arrecadacao_cnae',partition_cols=['ano','mes']) |
Oops, something went wrong.