Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbt] br_mercadolivre_oferta #114

Merged
merged 4 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 60 additions & 51 deletions models/br_mercadolivre_ofertas/item.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,65 +7,65 @@
}
)}}

WITH tabela AS (
SELECT
PARSE_DATE('%Y-%m-%d', FORMAT_TIMESTAMP('%Y-%m-%d', data_hora)) AS data_consulta,
PARSE_TIME('%H:%M:%S', FORMAT_TIMESTAMP('%H:%M:%S', data_hora)) AS hora_consulta,
secao_site,
item_id as id_item,
titulo,
vendedor_id as id_vendedor,
CASE
WHEN vendedor = 'None' THEN NULL
ELSE vendedor
END AS vendedor,
REGEXP_EXTRACT(categoria, r'^([^,]+)') AS categoria_principal,
REGEXP_EXTRACT(categoria, r'^([^\n]+)') AS categorias,
envio_pais as envio_nacional,
caracteristicas,
quantidade_avaliacoes as quantidade_avaliacao,
estrelas as avaliacao,
CASE
WHEN IS_NAN(preco_original) THEN null
WHEN preco > preco_original THEN preco
ELSE preco_original
END preco_original,
desconto,
CASE
WHEN preco > preco_original THEN preco_original
WHEN preco = preco_original THEN null
ELSE preco
END AS preco_final

FROM
(
SELECT
ARRAY_TO_STRING(categorias, ', ') AS categoria,
*
FROM
`basedosdados.br_mercadolivre_ofertas.item`
WHERE NOT ARRAY_TO_STRING(categorias, ', ') LIKE '...%'
) a
LEFT JOIN
(SELECT
DISTINCT
dia,
vendedor_id,
nome
WITH tabela_ordenada as (
SELECT
dia AS data_consulta,
TIME(
EXTRACT(HOUR FROM PARSE_DATETIME('%Y-%m-%d %H:%M:%S', data_hora)),
EXTRACT(MINUTE FROM PARSE_DATETIME('%Y-%m-%d %H:%M:%S', data_hora)),
EXTRACT(SECOND FROM PARSE_DATETIME('%Y-%m-%d %H:%M:%S', data_hora))
) AS hora_consulta,
secao_site,
LPAD(item_id, 12, '0') AS id_item,
CASE
WHEN vendedor='None' THEN NULL
ELSE vendedor
END vendedor,
titulo,
CASE
WHEN categorias = '[]' THEN null
ELSE TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(0)], '"')
END as categoria_principal,
CASE
WHEN categorias = '[]' THEN null
ELSE ARRAY_TO_STRING(ARRAY(SELECT x FROM UNNEST(JSON_EXTRACT_ARRAY(categorias)) AS x WITH OFFSET
WHERE OFFSET > 0), ', ')
END as outras_categorias,
CASE
WHEN caracteristicas = '{}' THEN NULL
ELSE caracteristicas
END caracteristicas,
SAFE_CAST(envio_pais AS BOOL) envio_nacional,
SAFE_CAST(quantidade_avaliacoes AS INT64) quantidade_avaliacao,
SAFE_CAST(estrelas AS FLOAT64) avaliacao,
SAFE_CAST(CASE
WHEN preco_original = 'nan' THEN null
WHEN preco > preco_original THEN preco
ELSE preco_original
END AS FLOAT64) AS preco_original,
SAFE_CAST(desconto AS INT64) desconto,
SAFE_CAST (CASE
WHEN preco > preco_original THEN preco_original
WHEN preco = preco_original THEN null
ELSE preco
END AS FLOAT64) AS preco_final,
FROM
`basedosdados.br_mercadolivre_ofertas.vendedor`) b
ON a.vendedor = b.nome and FORMAT_TIMESTAMP('%Y-%m-%d', data_hora) = FORMAT_TIMESTAMP('%Y-%m-%d', dia)
)
`basedosdados-staging.br_mercadolivre_ofertas_staging.item`)

SELECT
data_consulta,
hora_consulta,
secao_site,
id_item,
titulo,
id_vendedor,
id_vendor as id_vendedor,
vendedor,
categoria_principal,
categorias,
REGEXP_REPLACE(
TRIM(outras_categorias, '"'),
r'("([^"]+)")',
r'\2'
) as outras_categorias,
caracteristicas,
envio_nacional,
quantidade_avaliacao,
Expand All @@ -88,7 +88,16 @@ SELECT
ELSE preco_final
END, 2
) AS preco_final
FROM tabela
FROM tabela_ordenada a
LEFT JOIN
(SELECT
DISTINCT
dia,
id_vendor,
nome
FROM
`basedosdados-staging.br_mercadolivre_ofertas_staging.vendedor`) b
ON a.vendedor = b.nome and data_consulta = dia
WHERE NOT (preco_original IS NULL AND preco_final IS NULL)
AND NOT (preco_final IS NULL AND desconto IS NULL)
AND NOT (preco_original IS NULL AND desconto IS NULL)
2 changes: 1 addition & 1 deletion models/br_mercadolivre_ofertas/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ models:
description: Nome do vendedor
- name: categoria_principal
description: Primeira categoria do item classificada no site
- name: categorias
- name: outras_categorias
description: Todas as categorias descritas no site em relação ao item
- name: caracteristicas
description: Características adicionais do item
Expand Down
110 changes: 37 additions & 73 deletions models/br_mercadolivre_ofertas/vendedor.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,78 +7,42 @@
}
)}}

WITH tabela_deduplicada AS (
SELECT
PARSE_DATE('%Y-%m-%d', FORMAT_TIMESTAMP('%Y-%m-%d', dia)) AS data_consulta,
id_municipio,
vendedor_id as id_vendedor,
nome as vendedor,
classificacao,
reputacao,
experiencia as anos_experiencia,
ARRAY_AGG(opinioes.Bom)[OFFSET(0)] AS avaliacao_bom,
ARRAY_AGG(opinioes.Regular)[OFFSET(0)] AS avaliacao_regular,
ARRAY_AGG(opinioes.Ruim)[OFFSET(0)] AS avaliacao_ruim
FROM
`basedosdados.br_mercadolivre_ofertas.vendedor`
GROUP BY
data_consulta,
vendedor_id,
vendedor,
experiencia,
reputacao,
classificacao,
id_municipio
HAVING
COUNT(*) > 1
), tabela_unicos AS (
SELECT
PARSE_DATE('%Y-%m-%d', FORMAT_TIMESTAMP('%Y-%m-%d', dia)) AS data_consulta,
id_municipio,
vendedor_id as id_vendedor,
nome as vendedor,
classificacao,
reputacao,
experiencia as anos_experiencia,
ARRAY_AGG(opinioes.Bom)[OFFSET(0)] AS avaliacao_bom,
ARRAY_AGG(opinioes.Regular)[OFFSET(0)] AS avaliacao_regular,
ARRAY_AGG(opinioes.Ruim)[OFFSET(0)] AS avaliacao_ruim
FROM
`basedosdados.br_mercadolivre_ofertas.vendedor`
GROUP BY
data_consulta,
vendedor_id,
vendedor,
experiencia,
reputacao,
classificacao,
id_municipio
HAVING
COUNT(*) = 1
)
WITH main AS (
SELECT LPAD(id_vendor, 12, '0') as id_vendedor,
dia,
nome,
SAFE_CAST(experiencia AS INT64) experiencia,
reputacao,
CASE
WHEN classificacao='None' THEN NULL
ELSE classificacao
END AS classificacao,
id_municipio,
from `basedosdados-staging.br_mercadolivre_ofertas_staging.vendedor`
), predata AS (
SELECT
LPAD(id_vendor, 12, '0') as id_vendedor,
STRUCT(
json_extract_scalar(opinioes, '$.Bom') as Bom,
json_extract_scalar(opinioes, '$.Regular') as Regular,
json_extract_scalar(opinioes, '$.Ruim') as Ruim
) as opinioes
from `basedosdados-staging.br_mercadolivre_ofertas_staging.vendedor`
), tabela_ordenada AS (
SELECT
data_consulta,
id_municipio,
id_vendedor,
vendedor,
classificacao,
reputacao,
anos_experiencia,
SAFE_CAST(avaliacao_bom AS INT64) AS avaliacao_bom,
SAFE_CAST(avaliacao_regular AS INT64) AS avaliacao_regular,
SAFE_CAST(avaliacao_ruim AS INT64) AS avaliacao_ruim
FROM tabela_unicos
UNION ALL
SELECT
data_consulta,
id_municipio,
id_vendedor,
vendedor,
classificacao,
reputacao,
anos_experiencia,
SAFE_CAST(avaliacao_bom AS INT64) AS avaliacao_bom,
SAFE_CAST(avaliacao_regular AS INT64) AS avaliacao_regular,
SAFE_CAST(avaliacao_ruim AS INT64) AS avaliacao_ruim
FROM tabela_deduplicada
PARSE_DATE('%Y-%m-%d', dia) AS data_consulta,
id_municipio,
main.id_vendedor,
nome AS vendedor,
classificacao,
reputacao,
experiencia AS anos_experiencia,
SAFE_CAST(predata.opinioes.bom AS INT64) as avaliacao_bom,
SAFE_CAST(predata.opinioes.regular AS INT64) as avaliacao_regular,
SAFE_CAST(predata.opinioes.regular AS INT64) AS avaliacao_ruim
FROM main
LEFT JOIN predata
ON main.id_vendedor = predata.id_vendedor)

SELECT * FROM tabela_ordenada

Loading