Skip to content

Commit

Permalink
Merge pull request #30 from ThiagoPanini/develop
Browse files Browse the repository at this point in the history
Finalização de testes unitários dos métodos de transformação da classe GlueTransformationManager
  • Loading branch information
ThiagoPanini authored Jan 7, 2023
2 parents 76b4041 + b6b8bcd commit 555ca18
Show file tree
Hide file tree
Showing 11 changed files with 668 additions and 9 deletions.
9 changes: 8 additions & 1 deletion app/pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,11 @@ markers =
terraform: testes relacionados à declarações do usuário em arquivos Terraform usados para criação de recursos na nuvem
date_attributes_extraction: testes relacionados ao método date_attributes_extraction da classe GlueETLManager
add_partition: testes relacionados ao método add_partition da classe GlueETLManager
repartition_dataframe: testes relacionados ao método repartition_dataframe da classe GlueETLManager
repartition_dataframe: testes relacionados ao método repartition_dataframe da classe GlueETLManager
main: testes relacionados ao script principal da aplicação Spark implantada
orders: testes relacionados às transformações vinculadas ao DataFrame df_orders
order_items: testes relacionados às transformações vinculadas ao DataFrame df_order_items
customers: testes relacionados às transformações vinculadas ao DataFrame df_customers
payments: testes relacionados às transformações vinculadas ao DataFrame df_payments
reviews: testes relacionados às transformações vinculadas ao DataFrame df_reviews
sot: testes relacionados às transformações vinculadas ao DataFrame df_sot
3 changes: 0 additions & 3 deletions app/requirements.txt

This file was deleted.

3 changes: 3 additions & 0 deletions app/requirements_test_container.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pytest>=7.2.0
Faker
flake8
5 changes: 2 additions & 3 deletions app/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,6 @@ def transform_sot(self, **kwargs) -> DataFrame:
"max_price_order_item",
"avg_freight_value_order",
"max_order_shipping_limit_date",
"customer_unique_id",
"customer_zip_code_prefix",
"customer_city",
"customer_state",
"installments",
Expand Down Expand Up @@ -456,14 +454,15 @@ def run(self) -> None:
# Transformando dados
df_orders_prep = self.transform_orders(df=df_orders)
df_order_items_prep = self.transform_order_items(df=df_order_items)
df_customers_prep = self.transform_customers(df=df_customers)
df_payments_prep = self.transform_payments(df=df_payments)
df_reviews_prep = self.transform_reviews(df=df_reviews)

# Gerando base final com atributos enriquecidos
df_sot_prep = self.transform_sot(
df_orders_prep=df_orders_prep,
df_order_items_prep=df_order_items_prep,
df_customers_prep=df_customers,
df_customers_prep=df_customers_prep,
df_payments_prep=df_payments_prep,
df_reviews_prep=df_reviews_prep
)
Expand Down
164 changes: 162 additions & 2 deletions app/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

# Importando módulos para uso
import sys
import os
from pytest import fixture
from src.main import ARGV_LIST, DATA_DICT
from src.main import ARGV_LIST, DATA_DICT, GlueTransformationManager
from src.terraglue import GlueJobManager, GlueETLManager
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType,\
Expand All @@ -30,7 +31,6 @@
from tests.utils.spark_helper import generate_fake_spark_dataframe
from faker import Faker


# Instanciando objeto Faker
faker = Faker()
Faker.seed(42)
Expand Down Expand Up @@ -169,3 +169,163 @@ def fake_dataframe(spark):
spark=spark,
schema_input=schema
)


"""---------------------------------------------------
----------- 2. DEFINIÇÃO DE FIXTURES ÚTEIS -----------
2.4 Fixtures utilizadas em test_main
---------------------------------------------------"""


# Objeto instanciado da classe GlueETLManager
@fixture()
def glue_manager(job_args_for_testing):
# Adicionando argumentos ao vetor de argumentos
for arg_name, arg_value in job_args_for_testing.items():
sys.argv.append(f"--{arg_name}={arg_value}")

glue_manager = GlueTransformationManager(
argv_list=ARGV_LIST,
data_dict=DATA_DICT
)

return glue_manager


# Amostra de DataFrame df_orders
@fixture()
def df_orders(spark):
# Definindo variável para leitura do DataFrame
filename = "sample_olist_orders_dataset.csv"
data_path = os.path.join(
os.getcwd(),
f"app/tests/samples/{filename}"
)

# Realizando a leitura do DataFrame
df = spark.read.format("csv")\
.option("header", "true")\
.option("inferSchema", "false")\
.load(data_path)

return df


# Resultado do método de transformação df_orders_prep
@fixture()
def df_orders_prep(glue_manager, df_orders):
return glue_manager.transform_orders(df_orders)


# Amostra de DataFrame df_order_items
@fixture()
def df_order_items(spark):
# Definindo variável para leitura do DataFrame
filename = "sample_olist_order_items_dataset.csv"
data_path = os.path.join(
os.getcwd(),
f"app/tests/samples/{filename}"
)

# Realizando a leitura do DataFrame
df = spark.read.format("csv")\
.option("header", "true")\
.option("inferSchema", "false")\
.load(data_path)

return df


# Resultado do método de transformação df_order_items_prep
@fixture()
def df_order_items_prep(glue_manager, df_order_items):
return glue_manager.transform_order_items(df_order_items)


# Amostra de DataFrame df_customers
@fixture()
def df_customers(spark):
# Definindo variável para leitura do DataFrame
filename = "sample_olist_customers_dataset.csv"
data_path = os.path.join(
os.getcwd(),
f"app/tests/samples/{filename}"
)

# Realizando a leitura do DataFrame
df = spark.read.format("csv")\
.option("header", "true")\
.option("inferSchema", "false")\
.load(data_path)

return df


# Resultado do método de transformação df_customers_prep
@fixture()
def df_customers_prep(glue_manager, df_customers):
return glue_manager.transform_customers(df_customers)


# Amostra de DataFrame df_payments
@fixture()
def df_payments(spark):
# Definindo variável para leitura do DataFrame
filename = "sample_olist_order_payments_dataset.csv"
data_path = os.path.join(
os.getcwd(),
f"app/tests/samples/{filename}"
)

# Realizando a leitura do DataFrame
df = spark.read.format("csv")\
.option("header", "true")\
.option("inferSchema", "false")\
.load(data_path)

return df


# Resultado do método de transformação df_payments_prep
@fixture()
def df_payments_prep(glue_manager, df_payments):
return glue_manager.transform_payments(df_payments)


# Amostra de DataFrame df_reviews
@fixture()
def df_reviews(spark):
# Definindo variável para leitura do DataFrame
filename = "sample_olist_order_reviews_dataset.csv"
data_path = os.path.join(
os.getcwd(),
f"app/tests/samples/{filename}"
)

# Realizando a leitura do DataFrame
df = spark.read.format("csv")\
.option("header", "true")\
.option("inferSchema", "false")\
.load(data_path)

return df


# Resultado do método de transformação df_reviews_prep
@fixture()
def df_reviews_prep(glue_manager, df_reviews):
return glue_manager.transform_reviews(df_reviews)


# Resultado do método de transformação df_sot_prep
@fixture()
def df_sot_prep(glue_manager, df_orders_prep, df_order_items_prep,
df_customers_prep, df_payments_prep,
df_reviews_prep):
return glue_manager.transform_sot(
df_orders_prep=df_orders_prep,
df_order_items_prep=df_order_items_prep,
df_customers_prep=df_customers_prep,
df_payments_prep=df_payments_prep,
df_reviews_prep=df_reviews_prep
)
11 changes: 11 additions & 0 deletions app/tests/samples/sample_olist_customers_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"customer_id","customer_unique_id","customer_zip_code_prefix","customer_city","customer_state"
"8bb3bef4e75a95524235cdc11a7331af",d1f24d1d504e27bee13b415e40daeab0,"40055",salvador,BA
d987da9fb4086ab7c2c0f83963cd6722,"870a0bdc769f9a7870309036740e79ea","02929",sao paulo,SP
"2430ad4b1b6efb56cf3050b5d3cf5e54",fa78b26c2fa23b2ebda98c2926305c1d,"61979",amanari,CE
"6031cd91d182925af3d38ae9590e5afa","34acad212e30cd0d511be1034e2f9821","14150",serrana,SP
e1f35a414cbae52d09c294b3e58c3e89,a6f9ff98ef3cedac9d8a2b88afc89972,"75144",anapolis,GO
"0825646a316d8b2bdddea079a5e01fda",be2cff6c84f1683300337ecd499992e0,"04001",sao paulo,SP
afc13494642f88d253be56a1e353e261,"2b22fac410c77b08b937809c68e7481e","12955",bom jesus dos perdoes,SP
b626b511cecb256e0d1514d883084a38,"1373e04979cfa0fb2092909abbd57f25","45400",valenca,BA
ca9a6ae226341827c9614ce7568db46c,"15b521471c36ed411359347ff8257b79","03080",sao paulo,SP
"4930dfe106be258618f6907e8ce8795d",e26c256b09efa85577c1c600cf1e9bea,"05269",sao paulo,SP
14 changes: 14 additions & 0 deletions app/tests/samples/sample_olist_order_items_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"order_id","order_item_id","product_id","seller_id","shipping_limit_date","price","freight_value"
"001ab0a7578dd66cd4b0a71f5b6e1e41",1,"0b0172eb0fd18479d29c3bc122c058c2","5656537e588803a555b8eb41f07a944b",2018-01-04 02:33:42,24.89,17.63
"001ab0a7578dd66cd4b0a71f5b6e1e41",2,"0b0172eb0fd18479d29c3bc122c058c2","5656537e588803a555b8eb41f07a944b",2018-01-04 02:33:42,24.89,17.63
"001ab0a7578dd66cd4b0a71f5b6e1e41",3,"0b0172eb0fd18479d29c3bc122c058c2","5656537e588803a555b8eb41f07a944b",2018-01-04 02:33:42,24.89,17.63
"001d8f0e34a38c37f7dba2a37d4eba8b",1,e67307ff0f15ade43fcb6e670be7a74c,f4aba7c0bca51484c30ab7bdc34bcdd1,2017-05-18 17:35:11,18.99,7.78
"001d8f0e34a38c37f7dba2a37d4eba8b",2,e67307ff0f15ade43fcb6e670be7a74c,f4aba7c0bca51484c30ab7bdc34bcdd1,2017-05-18 17:35:11,18.99,7.78
"0025c5d1a8ca53a240ec2634bb4492ea",1,"35537536ed2b4c561b4018bf3abf54e0","955fee9216a65b617aa5c0531780ce60",2018-07-10 09:30:09,390.00,29.39
"006f7dfffe2d90809598e8f1972b829b",1,aacfae7cd4bac4849766f640abf2db8a,"729b2d09b2a0bdab221076327f13d050",2018-03-28 23:07:23,39.85,12.79
"0078a358a14592b887eb140ef515f5ab",1,"722f84416177a451c3be217ef8ffa082",cca3071e3e9bb7d12640c9fbe2301306,2017-11-10 15:55:43,253.52,82.86
"00921e4911895b93c7b4fc0d80c0815e",1,cbecf0dca7a42c56c9ad9e20c74af1fd,"688756f717c462a206ad854c5027a64a",2018-06-19 14:17:26,85.00,11.62
"00b2d2f2b5f7b98e6b1828764660134e",1,e0d64dcfaa3b6db5c54ca298ae101d05,"7d13fca15225358621be4086e1eb0964",2018-08-20 08:50:19,146.01,13.68
"00b30bb163474583c14db1689259cf4d",1,"3552627a68384dc559f0fd4cce173269","3c487ae8f8d7542beff5788e2e0aea83",2018-02-06 14:13:31,189.90,26.61
"00b676b01c289cc661c6f7732492771a",1,"121b9686b9929855d823981fc655a6fe",c4fb51fb1c5b7c07bc5e67be6e7e8f6e,2017-08-08 15:45:15,50.00,21.19
"00c2335723b9b74668062e946dc66621",1,b944aabf1fc45c01599ee96c7f4d533e,"128639473a139ac0f3e5f5ade55873a5",2018-01-09 04:28:51,18.90,12.48
11 changes: 11 additions & 0 deletions app/tests/samples/sample_olist_order_payments_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"order_id","payment_sequential","payment_type","payment_installments","payment_value"
"001ab0a7578dd66cd4b0a71f5b6e1e41",1,boleto,1,127.56
"001d8f0e34a38c37f7dba2a37d4eba8b",1,credit_card,2,53.54
"0025c5d1a8ca53a240ec2634bb4492ea",1,credit_card,7,419.39
"006f7dfffe2d90809598e8f1972b829b",1,credit_card,3,52.64
"0078a358a14592b887eb140ef515f5ab",1,credit_card,3,336.38
"00921e4911895b93c7b4fc0d80c0815e",1,credit_card,1,96.62
"00b2d2f2b5f7b98e6b1828764660134e",1,credit_card,4,159.69
"00b30bb163474583c14db1689259cf4d",1,credit_card,10,216.51
"00b676b01c289cc661c6f7732492771a",1,credit_card,1,71.19
"00c2335723b9b74668062e946dc66621",1,boleto,1,31.38
11 changes: 11 additions & 0 deletions app/tests/samples/sample_olist_order_reviews_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"review_id","order_id","review_score","review_comment_title","review_comment_message","review_creation_date","review_answer_timestamp"
"68b49cfcd9420c6ad09af97ea8268e7c","001ab0a7578dd66cd4b0a71f5b6e1e41",4,,Loja rápida na entrega. Só houve um incidente pelo fato de não vim o produto escolhido e sim um similar.,2018-01-18 00:00:00,2018-01-25 03:07:10
b8fede4fbe6126f9f85ebdd23166","001d8f0e34a38c37f7dba2a37d4eba8b",1,,Entrega prometida 24/05/17. Dia 26/05/17 não havia recebido ainda.,2017-05-26 00:00:00,2017-05-26 20:19:13
"eba375fa5fe6f3dc7ca2aa6682b46170","0025c5d1a8ca53a240ec2634bb4492ea",5,sensacional,"ótimo produto, bom acabamento. Para quem está começando nesse estudo sobre o universo, principalmente estudantes é uma ótima ferramenta. Tenho feito vários estudos com o auxilio do telescópio.",2018-08-01 00:00:00,2018-08-03 15:21:12
"6d61a5411b9e9cd49b469a1ca07834c3","006f7dfffe2d90809598e8f1972b829b",2,,Demorou para entregar e quando chegou veio errado. ,2018-04-06 00:00:00,2018-04-17 23:03:40
"2820a27cb81757fc08f3241188130070","0078a358a14592b887eb140ef515f5ab",5,,"Muito lindo, ficou perfeito na minha cama.
"8928004690de207823c49726ee9d53df","00921e4911895b93c7b4fc0d80c0815e",5,,,2018-06-21 00:00:00,2018-06-25 14:09:28
"7ecb82e62756c3a6b40c9db6a00b9b9d","00b2d2f2b5f7b98e6b1828764660134e",4,RECOMENDO ,"RELOGIO LINDO ,POREM ELE É PEQUENO ,ESTOU ACOSTUMADA COM RELOGIOS TIPO FAUSTAO RS ,MAS ESSE BEM LEVE ,SÓ QUE VEIO CAIXA DE PAPEL BEM INFERIOR POR SER CASIO ",2018-08-16 00:00:00,2018-08-17 11:00:30
"4be0fcb2fc18e0da288ad4f45c38059e","00b30bb163474583c14db1689259cf4d",5,,Recomendo!!,2018-02-18 00:00:00,2018-02-21 01:43:34
"16561345af89c9e66c13a051f4f15d65","00b676b01c289cc661c6f7732492771a",3,,O kit comprado tinha 8 módulos e só vieram 7. Mandei um e mail para o fornecedor e até agora não obtive resposta.,2017-08-16 00:00:00,2017-08-16 17:05:07
"a5f8ba2a5bba6457fc5edfd182710120","00c2335723b9b74668062e946dc66621",5,,Entregue antes do prazo e material de boa qualidade... recomendo,2018-01-10 00:00:00,2018-01-10 18:29:23
11 changes: 11 additions & 0 deletions app/tests/samples/sample_olist_orders_dataset.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"order_id","customer_id","order_status","order_purchase_timestamp","order_approved_at","order_delivered_carrier_date","order_delivered_customer_date","order_estimated_delivery_date"
"001ab0a7578dd66cd4b0a71f5b6e1e41","8bb3bef4e75a95524235cdc11a7331af",delivered,2017-12-27 00:38:47,2017-12-28 02:33:42,2017-12-28 18:09:35,2018-01-17 20:43:29,2018-01-29 00:00:00
"001d8f0e34a38c37f7dba2a37d4eba8b",d987da9fb4086ab7c2c0f83963cd6722,delivered,2017-05-14 17:19:44,2017-05-14 17:35:11,2017-05-24 15:45:01,2017-05-26 13:14:50,2017-05-24 00:00:00
"0025c5d1a8ca53a240ec2634bb4492ea","2430ad4b1b6efb56cf3050b5d3cf5e54",delivered,2018-07-08 09:17:59,2018-07-08 09:30:09,2018-07-10 13:36:00,2018-07-31 10:51:58,2018-08-03 00:00:00
"006f7dfffe2d90809598e8f1972b829b","6031cd91d182925af3d38ae9590e5afa",delivered,2018-03-22 22:52:46,2018-03-22 23:07:23,2018-03-31 14:18:55,2018-04-05 19:18:35,2018-04-11 00:00:00
"0078a358a14592b887eb140ef515f5ab",e1f35a414cbae52d09c294b3e58c3e89,delivered,2017-11-06 15:42:42,2017-11-06 22:36:39,2017-11-08 12:24:40,2017-11-21 19:04:54,2017-11-29 00:00:00
"00921e4911895b93c7b4fc0d80c0815e","0825646a316d8b2bdddea079a5e01fda",delivered,2018-06-13 13:55:55,2018-06-13 14:17:26,2018-06-19 14:02:00,2018-06-20 15:21:57,2018-06-25 00:00:00
"00b2d2f2b5f7b98e6b1828764660134e",afc13494642f88d253be56a1e353e261,delivered,2018-08-09 08:27:54,2018-08-09 08:50:19,2018-08-10 17:15:00,2018-08-15 20:20:42,2018-08-30 00:00:00
"00b30bb163474583c14db1689259cf4d",b626b511cecb256e0d1514d883084a38,delivered,2018-01-31 13:57:27,2018-01-31 14:13:31,2018-02-06 17:47:50,2018-02-17 18:36:59,2018-03-05 00:00:00
"00b676b01c289cc661c6f7732492771a",ca9a6ae226341827c9614ce7568db46c,delivered,2017-08-02 15:30:42,2017-08-02 15:45:15,2017-08-08 15:37:43,2017-08-15 11:22:26,2017-08-24 00:00:00
"00c2335723b9b74668062e946dc66621","4930dfe106be258618f6907e8ce8795d",delivered,2017-12-31 01:31:16,2018-01-03 04:28:51,2018-01-03 18:05:49,2018-01-09 20:19:21,2018-01-29 00:00:00
Loading

0 comments on commit 555ca18

Please sign in to comment.