Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbt] br_mercadolivre_ofertas #120

Merged
merged 4 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions models/br_mercadolivre_ofertas/item.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,23 @@ TIME(
ELSE vendedor
END vendedor,
titulo,
categorias,
CASE
WHEN categorias = '[]' THEN null
WHEN TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(1)], '"') = '...' THEN
TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(2)], '"')
WHEN TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(0)], '"') = '...' THEN
TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(1)], '"')
ELSE TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(0)], '"')
END as categoria_principal,
CASE
WHEN categorias = '[]' THEN null
when categorias = '[]' then null
WHEN TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(1)], '"') = '...' THEN
ARRAY_TO_STRING(ARRAY(SELECT x FROM UNNEST(JSON_EXTRACT_ARRAY(categorias)) AS x WITH OFFSET
WHERE OFFSET > 3), ', ')
WHEN TRIM(JSON_EXTRACT_ARRAY(categorias)[OFFSET(0)], '"') = '...' THEN
ARRAY_TO_STRING(ARRAY(SELECT x FROM UNNEST(JSON_EXTRACT_ARRAY(categorias)) AS x WITH OFFSET
WHERE OFFSET > 1), ', ')
ELSE ARRAY_TO_STRING(ARRAY(SELECT x FROM UNNEST(JSON_EXTRACT_ARRAY(categorias)) AS x WITH OFFSET
WHERE OFFSET > 0), ', ')
END as outras_categorias,
Expand All @@ -50,7 +61,10 @@ TIME(
ELSE preco
END AS FLOAT64) AS preco_final,
FROM
`basedosdados-staging.br_mercadolivre_ofertas_staging.item`)
`basedosdados-staging.br_mercadolivre_ofertas_staging.item`

)

SELECT
data_consulta,
hora_consulta,
Expand All @@ -59,7 +73,7 @@ SELECT
titulo,
id_vendor as id_vendedor,
vendedor,
categoria_principal,
a.categoria_principal,
REGEXP_REPLACE(
TRIM(outras_categorias, '"'),
r'("([^"]+)")',
Expand Down
2 changes: 1 addition & 1 deletion models/br_mercadolivre_ofertas/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ models:
- name: categoria_principal
description: Primeira categoria do item classificada no site
- name: outras_categorias
description: Todas as categorias descritas no site em relação ao item
description: Demais categorias descritas no site em relação ao item
- name: caracteristicas
description: Características adicionais do item
- name: envio_nacional
Expand Down
55 changes: 53 additions & 2 deletions models/br_mercadolivre_ofertas/vendedor.sql
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,58 @@ SELECT
SAFE_CAST(predata.opinioes.regular AS INT64) AS avaliacao_ruim
FROM main
LEFT JOIN predata
ON main.id_vendedor = predata.id_vendedor)
ON main.id_vendedor = predata.id_vendedor),

SELECT * FROM tabela_ordenada
tabela_deduplicada AS (
SELECT
PARSE_DATE('%Y-%m-%d', data_consulta) AS data_consulta,
id_municipio,
id_vendedor,
vendedor,
classificacao,
reputacao,
anos_experiencia,
ARRAY_AGG(avaliacao_bom)[OFFSET(0)] AS avaliacao_bom,
ARRAY_AGG(avaliacao_regular)[OFFSET(0)] AS avaliacao_regular,
ARRAY_AGG(avaliacao_ruim)[OFFSET(0)] AS avaliacao_ruim
FROM
tabela_ordenada
GROUP BY
data_consulta,
id_vendedor,
vendedor,
anos_experiencia,
reputacao,
classificacao,
id_municipio
HAVING
COUNT(*) > 1
), tabela_unicos AS (
SELECT
PARSE_DATE('%Y-%m-%d', data_consulta) AS data_consulta,
id_municipio,
id_vendedor,
vendedor,
classificacao,
reputacao,
anos_experiencia,
ARRAY_AGG(avaliacao_bom)[OFFSET(0)] AS avaliacao_bom,
ARRAY_AGG(avaliacao_regular)[OFFSET(0)] AS avaliacao_regular,
ARRAY_AGG(avaliacao_ruim)[OFFSET(0)] AS avaliacao_ruim
FROM
tabela_ordenada
GROUP BY
data_consulta,
id_vendedor,
vendedor,
anos_experiencia,
reputacao,
classificacao,
id_municipio
HAVING
COUNT(*) = 1
)
SELECT * FROM tabela_unicos
UNION ALL
SELECT * FROM tabela_deduplicada

Loading