Skip to content

Commit

Permalink
Move the resource files to meaningful directory names
Browse files Browse the repository at this point in the history
Place the content corresponding to delta tables used for testing
purposes in directories having names corresponding to the medium
in which the delta table content has been created:

- databricks73
- databricks104
- databricks122
- databricks131
- deltalake
  • Loading branch information
findinpath authored and findepi committed Aug 30, 2023
1 parent 263ffcd commit 1aca030
Show file tree
Hide file tree
Showing 656 changed files with 125 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -100,22 +100,22 @@ public abstract class BaseDeltaLakeConnectorSmokeTest
.build()
.asList();

private static final List<String> NON_TPCH_TABLES = ImmutableList.of(
"invariants",
"person",
"foo",
"bar",
"old_dates",
"old_timestamps",
"nested_timestamps",
"nested_timestamps_parquet_stats",
"json_stats_on_row_type",
"parquet_stats_missing",
"uppercase_columns",
"default_partitions",
"insert_nonlowercase_columns",
"insert_nested_nonlowercase_columns",
"insert_nonlowercase_columns_partitioned");
private static final List<ResourceTable> NON_TPCH_TABLES = ImmutableList.of(
new ResourceTable("invariants", "deltalake/invariants"),
new ResourceTable("person", "databricks73/person"),
new ResourceTable("foo", "databricks73/foo"),
new ResourceTable("bar", "databricks73/bar"),
new ResourceTable("old_dates", "databricks73/old_dates"),
new ResourceTable("old_timestamps", "databricks73/old_timestamps"),
new ResourceTable("nested_timestamps", "databricks73/nested_timestamps"),
new ResourceTable("nested_timestamps_parquet_stats", "databricks73/nested_timestamps_parquet_stats"),
new ResourceTable("json_stats_on_row_type", "databricks104/json_stats_on_row_type"),
new ResourceTable("parquet_stats_missing", "databricks73/parquet_stats_missing"),
new ResourceTable("uppercase_columns", "databricks73/uppercase_columns"),
new ResourceTable("default_partitions", "databricks73/default_partitions"),
new ResourceTable("insert_nonlowercase_columns", "databricks73/insert_nonlowercase_columns"),
new ResourceTable("insert_nested_nonlowercase_columns", "databricks73/insert_nested_nonlowercase_columns"),
new ResourceTable("insert_nonlowercase_columns_partitioned", "databricks73/insert_nonlowercase_columns_partitioned"));

// Cannot be too small, as implicit (time-based) cache invalidation can mask issues. Cannot be too big as some tests need to wait for cache
// to be outdated.
Expand Down Expand Up @@ -184,8 +184,7 @@ protected QueryRunner createQueryRunner()
* (TIMESTAMP '0100-01-01 01:02:03', 1), (TIMESTAMP '1582-10-15 01:02:03', 2), (TIMESTAMP '1960-01-01 01:02:03', 3), (TIMESTAMP '2020-01-01 01:02:03', 4);
*/
NON_TPCH_TABLES.forEach(table -> {
String resourcePath = "databricks/" + table;
registerTableFromResources(table, resourcePath, queryRunner);
registerTableFromResources(table.tableName(), table.resourcePath(), queryRunner);
});

queryRunner.installPlugin(new TestingHivePlugin());
Expand Down Expand Up @@ -317,7 +316,7 @@ public void testCreatePartitionedTable()
public void testPathUriDecoding()
{
String tableName = "test_uri_table_" + randomNameSuffix();
registerTableFromResources(tableName, "databricks/uri", getQueryRunner());
registerTableFromResources(tableName, "deltalake/uri", getQueryRunner());

assertQuery("SELECT * FROM " + tableName, "VALUES ('a=equal', 1), ('a:colon', 2), ('a+plus', 3), ('a space', 4), ('a%percent', 5)");
String firstFilePath = (String) computeScalar("SELECT \"$path\" FROM " + tableName + " WHERE y = 1");
Expand Down Expand Up @@ -1541,7 +1540,7 @@ public void testStatsSplitPruningBasedOnSepCreatedCheckpoint()
public void testStatsSplitPruningBasedOnSepCreatedCheckpointOnTopOfCheckpointWithJustStructStats()
{
String tableName = "test_sep_checkpoint_stats_pruning_struct_stats_" + randomNameSuffix();
registerTableFromResources(tableName, "databricks/pruning/parquet_struct_statistics", getQueryRunner());
registerTableFromResources(tableName, "databricks73/pruning/parquet_struct_statistics", getQueryRunner());
String transactionLogDirectory = format("%s/_delta_log", tableName);

// there should be one checkpoint already (created by DB)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.deltalake;

public record ResourceTable(String tableName, String resourcePath) {}
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ public void testForceRecalculateAllStats()
public void testNoStats()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_stats");
String tableName = copyResourcesAndRegisterTable("no_stats", "trino410/no_stats");
String expectedData = "VALUES (42, 'foo'), (12, 'ab'), (null, null), (15, 'cd'), (15, 'bar')";

assertQuery("SELECT * FROM " + tableName, expectedData);
Expand Down Expand Up @@ -962,7 +962,7 @@ public void testNoStats()
public void testNoColumnStats()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_column_stats");
String tableName = copyResourcesAndRegisterTable("no_column_stats", "databricks73/no_column_stats");
assertQuery("SELECT * FROM " + tableName, "VALUES (42, 'foo')");

assertUpdate("ANALYZE " + tableName, 1);
Expand All @@ -982,7 +982,7 @@ public void testNoColumnStats()
public void testNoColumnStatsMixedCase()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_column_stats_mixed_case");
String tableName = copyResourcesAndRegisterTable("no_column_stats_mixed_case", "databricks104/no_column_stats_mixed_case");
String tableLocation = getTableLocation(tableName);
assertQuery("SELECT * FROM " + tableName, "VALUES (11, 'a'), (2, 'b'), (null, null)");

Expand Down Expand Up @@ -1013,7 +1013,7 @@ public void testNoColumnStatsMixedCase()
public void testPartiallyNoStats()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_stats");
String tableName = copyResourcesAndRegisterTable("no_stats", "trino410/no_stats");
// Add additional transaction log entry with statistics
assertUpdate("INSERT INTO " + tableName + " VALUES (1,'a'), (12,'b')", 2);
assertQuery("SELECT * FROM " + tableName, " VALUES (42, 'foo'), (12, 'ab'), (null, null), (15, 'cd'), (15, 'bar'), (1, 'a'), (12, 'b')");
Expand All @@ -1038,7 +1038,7 @@ public void testPartiallyNoStats()
public void testNoStatsPartitionedTable()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_stats_partitions");
String tableName = copyResourcesAndRegisterTable("no_stats_partitions", "trino410/no_stats_partitions");
assertQuery("SELECT * FROM " + tableName,
"""
VALUES
Expand Down Expand Up @@ -1067,7 +1067,7 @@ public void testNoStatsPartitionedTable()
public void testNoStatsVariousTypes()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_stats_various_types");
String tableName = copyResourcesAndRegisterTable("no_stats_various_types", "trino410/no_stats_various_types");
assertQuery("SELECT c_boolean, c_tinyint, c_smallint, c_integer, c_bigint, c_real, c_double, c_decimal1, c_decimal2, c_date1, CAST(c_timestamp AS TIMESTAMP), c_varchar1, c_varchar2, c_varbinary FROM " + tableName,
"""
VALUES
Expand Down Expand Up @@ -1106,7 +1106,7 @@ public void testNoStatsVariousTypes()
public void testNoStatsWithColumnMappingModeId()
throws Exception
{
String tableName = copyResourcesAndRegisterTable("no_stats_column_mapping_id");
String tableName = copyResourcesAndRegisterTable("no_stats_column_mapping_id", "databricks104/no_stats_column_mapping_id");

assertQuery("SELECT * FROM " + tableName, " VALUES (42, 'foo'), (1, 'a'), (2, 'b'), (null, null)");

Expand All @@ -1123,12 +1123,12 @@ public void testNoStatsWithColumnMappingModeId()
cleanExternalTable(tableName);
}

private String copyResourcesAndRegisterTable(String resourceTable)
private String copyResourcesAndRegisterTable(String resourceTable, String resourcePath)
throws IOException, URISyntaxException
{
Path tableLocation = Files.createTempDirectory(null);
String tableName = resourceTable + randomNameSuffix();
URI resourcesLocation = getClass().getClassLoader().getResource("databricks/" + resourceTable).toURI();
URI resourcesLocation = getClass().getClassLoader().getResource(resourcePath).toURI();
copyDirectoryContents(Path.of(resourcesLocation), tableLocation);
assertUpdate(format("CALL system.register_table('%s', '%s', '%s')", getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri()));
return tableName;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,15 @@ public class TestDeltaLakeBasic
{
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapperProvider().get();

private static final List<String> PERSON_TABLES = ImmutableList.of(
"person", "person_without_last_checkpoint", "person_without_old_jsons", "person_without_checkpoints");
private static final List<String> OTHER_TABLES = ImmutableList.of("no_column_stats", "timestamp_ntz", "timestamp_ntz_partition");
private static final List<ResourceTable> PERSON_TABLES = ImmutableList.of(
new ResourceTable("person", "databricks73/person"),
new ResourceTable("person_without_last_checkpoint", "databricks73/person_without_last_checkpoint"),
new ResourceTable("person_without_old_jsons", "databricks73/person_without_old_jsons"),
new ResourceTable("person_without_checkpoints", "databricks73/person_without_checkpoints"));
private static final List<ResourceTable> OTHER_TABLES = ImmutableList.of(
new ResourceTable("no_column_stats", "databricks73/no_column_stats"),
new ResourceTable("timestamp_ntz", "databricks131/timestamp_ntz"),
new ResourceTable("timestamp_ntz_partition", "databricks131/timestamp_ntz_partition"));

// The col-{uuid} pattern for delta.columnMapping.physicalName
private static final Pattern PHYSICAL_COLUMN_NAME_PATTERN = Pattern.compile("^col-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$");
Expand All @@ -97,32 +103,32 @@ protected QueryRunner createQueryRunner()
@BeforeClass
public void registerTables()
{
for (String table : Iterables.concat(PERSON_TABLES, OTHER_TABLES)) {
String dataPath = getTableLocation(table).toExternalForm();
for (ResourceTable table : Iterables.concat(PERSON_TABLES, OTHER_TABLES)) {
String dataPath = getTableLocation(table.resourcePath()).toExternalForm();
getQueryRunner().execute(
format("CALL system.register_table('%s', '%s', '%s')", getSession().getSchema().orElseThrow(), table, dataPath));
format("CALL system.register_table('%s', '%s', '%s')", getSession().getSchema().orElseThrow(), table.tableName(), dataPath));
}
}

private URL getTableLocation(String table)
private URL getTableLocation(String resourcePath)
{
return getClass().getClassLoader().getResource("databricks/" + table);
return getClass().getClassLoader().getResource(resourcePath);
}

@DataProvider
public Object[][] tableNames()
public Object[][] tables()
{
return PERSON_TABLES.stream()
.map(table -> new Object[] {table})
.toArray(Object[][]::new);
}

@Test(dataProvider = "tableNames")
public void testDescribeTable(String tableName)
@Test(dataProvider = "tables")
public void testDescribeTable(ResourceTable table)
{
// the schema is actually defined in the transaction log
assertQuery(
format("DESCRIBE %s", tableName),
format("DESCRIBE %s", table.tableName()),
"VALUES " +
"('name', 'varchar', '', ''), " +
"('age', 'integer', '', ''), " +
Expand All @@ -133,15 +139,15 @@ public void testDescribeTable(String tableName)
"('income', 'double', '', '')");
}

@Test(dataProvider = "tableNames")
public void testSimpleQueries(String tableName)
@Test(dataProvider = "tables")
public void testSimpleQueries(ResourceTable table)
{
assertQuery(format("SELECT COUNT(*) FROM %s", tableName), "VALUES 12");
assertQuery(format("SELECT income FROM %s WHERE name = 'Bob'", tableName), "VALUES 99000.00");
assertQuery(format("SELECT name FROM %s WHERE name LIKE 'B%%'", tableName), "VALUES ('Bob'), ('Betty')");
assertQuery(format("SELECT DISTINCT gender FROM %s", tableName), "VALUES ('M'), ('F'), (null)");
assertQuery(format("SELECT DISTINCT age FROM %s", tableName), "VALUES (21), (25), (28), (29), (30), (42)");
assertQuery(format("SELECT name FROM %s WHERE age = 42", tableName), "VALUES ('Alice'), ('Emma')");
assertQuery(format("SELECT COUNT(*) FROM %s", table.tableName()), "VALUES 12");
assertQuery(format("SELECT income FROM %s WHERE name = 'Bob'", table.tableName()), "VALUES 99000.00");
assertQuery(format("SELECT name FROM %s WHERE name LIKE 'B%%'", table.tableName()), "VALUES ('Bob'), ('Betty')");
assertQuery(format("SELECT DISTINCT gender FROM %s", table.tableName()), "VALUES ('M'), ('F'), (null)");
assertQuery(format("SELECT DISTINCT age FROM %s", table.tableName()), "VALUES (21), (25), (28), (29), (30), (42)");
assertQuery(format("SELECT name FROM %s WHERE age = 42", table.tableName()), "VALUES ('Alice'), ('Emma')");
}

@Test
Expand Down Expand Up @@ -565,7 +571,7 @@ public void testIdentityColumns()
{
String tableName = "test_identity_columns_" + randomNameSuffix();
Path tableLocation = Files.createTempFile(tableName, null);
copyDirectoryContents(new File(Resources.getResource("databricks/identity_columns").toURI()).toPath(), tableLocation);
copyDirectoryContents(new File(Resources.getResource("databricks122/identity_columns").toURI()).toPath(), tableLocation);

assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri()));
assertQueryReturnsEmptyResult("SELECT * FROM " + tableName);
Expand Down Expand Up @@ -610,7 +616,7 @@ public void testCorruptedExternalTableLocation()
// create a bad_person table which is based on person table in temporary location
String tableName = "bad_person_" + randomNameSuffix();
Path tableLocation = Files.createTempFile(tableName, null);
copyDirectoryContents(Path.of(getTableLocation("person").toURI()), tableLocation);
copyDirectoryContents(Path.of(getTableLocation("databricks73/person").toURI()), tableLocation);
getQueryRunner().execute(
format("CALL system.register_table('%s', '%s', '%s')", getSession().getSchema().orElseThrow(), tableName, tableLocation));
testCorruptedTableLocation(tableName, tableLocation, false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public void testWritesLockInvalidContents(String writeStatement, String expected
public void testDeltaColumnInvariant()
{
String tableName = "test_invariants_" + randomNameSuffix();
hiveMinioDataLake.copyResources("databricks/invariants", tableName);
hiveMinioDataLake.copyResources("deltalake/invariants", tableName);
assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(SCHEMA, tableName, getLocationForTable(bucketName, tableName)));

assertQuery("SELECT * FROM " + tableName, "VALUES 1");
Expand All @@ -196,7 +196,7 @@ public void testDeltaColumnInvariant()
public void testSchemaEvolutionOnTableWithColumnInvariant()
{
String tableName = "test_schema_evolution_on_table_with_column_invariant_" + randomNameSuffix();
hiveMinioDataLake.copyResources("databricks/invariants", tableName);
hiveMinioDataLake.copyResources("deltalake/invariants", tableName);
getQueryRunner().execute(format(
"CALL system.register_table('%s', '%s', '%s')",
SCHEMA,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ protected QueryRunner createQueryRunner()
@BeforeClass
public void registerTables()
{
String dataPath = getClass().getClassLoader().getResource("databricks/read_timestamps").toExternalForm();
String dataPath = getClass().getClassLoader().getResource("databricks73/read_timestamps").toExternalForm();
getQueryRunner().execute(format("CALL system.register_table('%s', 'read_timestamps', '%s')", getSession().getSchema().orElseThrow(), dataPath));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ protected QueryRunner createQueryRunner()
@BeforeClass
public void registerTables()
{
String dataPath = Resources.getResource("databricks/person").toExternalForm();
String dataPath = Resources.getResource("databricks73/person").toExternalForm();
getQueryRunner().execute(
format("CALL system.register_table('%s', 'person', '%s')", getSession().getSchema().orElseThrow(), dataPath));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
public class TestPredicatePushdown
extends AbstractTestQueryFramework
{
private static final Path RESOURCE_PATH = Path.of("databricks/pushdown/");
private static final Path RESOURCE_PATH = Path.of("databricks73/pushdown/");
private static final String TEST_SCHEMA = "default";

private final String bucketName = "delta-test-pushdown-" + randomNameSuffix();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class TestReadJsonTransactionLog
public Object[][] dataSource()
{
return new Object[][] {
{"databricks"},
{"databricks73"},
{"deltalake"},
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ protected QueryRunner createQueryRunner()
public void registerTables()
{
for (String table : TABLES) {
String dataPath = Resources.getResource("databricks/pruning/" + table).toExternalForm();
String dataPath = Resources.getResource("databricks73/pruning/" + table).toExternalForm();
getQueryRunner().execute(
format("CALL system.register_table('%s', '%s', '%s')", getSession().getSchema().orElseThrow(), table, dataPath));
}
Expand Down Expand Up @@ -274,7 +274,7 @@ public void testPartitionPruningWithExpressionAndDomainFilter()
public void testSplitGenerationError()
{
// log entry with invalid stats (low > high)
String dataPath = Resources.getResource("databricks/pruning/invalid_log").toExternalForm();
String dataPath = Resources.getResource("databricks73/pruning/invalid_log").toExternalForm();
getQueryRunner().execute(
format("CALL system.register_table('%s', 'person', '%s')", getSession().getSchema().orElseThrow(), dataPath));
assertQueryFails("SELECT name FROM person WHERE income < 1000", "Failed to generate splits for tpch.person");
Expand Down
Loading

0 comments on commit 1aca030

Please sign in to comment.