From 0c163308eacf4d1e3215e429d2859fdd0fff1de5 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Mon, 6 May 2024 15:36:03 +0200 Subject: [PATCH] Finer grained cache missing control in caching hive metastore Allow caching missing stats without caching missing tables. This allows leveraging caching on un-analyzed Hive tables (with associated caching's trade-offs) without having to opt-in into caching missing tables, which has different set of trade-offs. --- .../metastore/cache/CachingHiveMetastore.java | 34 ++++++++++------ .../cache/CachingHiveMetastoreConfig.java | 39 ++++++++++++++++--- .../cache/SharedHiveMetastoreCache.java | 21 ++++++++-- .../cache/TestCachingHiveMetastore.java | 4 +- .../cache/TestCachingHiveMetastoreConfig.java | 10 ++++- 5 files changed, 82 insertions(+), 26 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java index ad62a7b3f4c84..7aa8a53548e35 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastore.java @@ -18,6 +18,7 @@ import com.google.common.cache.CacheLoader.InvalidCacheLoadException; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Sets.SetView; import com.google.common.util.concurrent.ListenableFuture; @@ -89,6 +90,9 @@ import static io.trino.plugin.hive.metastore.HivePartitionName.hivePartitionName; import static io.trino.plugin.hive.metastore.HiveTableName.hiveTableName; import static io.trino.plugin.hive.metastore.PartitionFilter.partitionFilter; +import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.OTHER; +import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.PARTITION; +import static io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType.STATS; import static io.trino.plugin.hive.util.HiveUtil.makePartName; import static java.util.Collections.unmodifiableSet; import static java.util.Objects.requireNonNull; @@ -107,8 +111,14 @@ public enum StatsRecording DISABLED } + public enum ObjectType { + PARTITION, + STATS, + OTHER, + } + private final HiveMetastore delegate; - private final boolean cacheMissing; + private final Set cacheMissing; private final LoadingCache> databaseCache; private final LoadingCache> databaseNamesCache; private final LoadingCache> tableCache; @@ -126,7 +136,7 @@ public static CachingHiveMetastore createPerTransactionCache(HiveMetastore deleg { return new CachingHiveMetastore( delegate, - true, + ImmutableSet.copyOf(ObjectType.values()), new CacheFactory(maximumSize), new CacheFactory(maximumSize), new CacheFactory(maximumSize), @@ -141,8 +151,8 @@ public static CachingHiveMetastore createCachingHiveMetastore( Executor refreshExecutor, long maximumSize, StatsRecording statsRecording, - boolean cacheMissing, - boolean partitionCacheEnabled) + boolean partitionCacheEnabled, + Set cacheMissing) { // refresh executor is only required when the refresh interval is set, but the executor is // always set, so it is simpler to just enforce that @@ -183,7 +193,7 @@ public static CachingHiveMetastore createCachingHiveMetastore( private CachingHiveMetastore( HiveMetastore delegate, - boolean cacheMissing, + Set cacheMissing, CacheFactory cacheFactory, CacheFactory partitionCacheFactory, CacheFactory statsCacheFactory, @@ -261,14 +271,14 @@ private static V get(LoadingCache cache, K key) } } - private Optional getOptional(LoadingCache> cache, K key) + private Optional getOptional(ObjectType objectType, LoadingCache> cache, K key) { try { Optional value = cache.getIfPresent(key); @SuppressWarnings("OptionalAssignedToNull") boolean valueIsPresent = value != null; if (valueIsPresent) { - if (value.isPresent() || cacheMissing) { + if (value.isPresent() || cacheMissing.contains(objectType)) { return value; } cache.invalidate(key); @@ -396,7 +406,7 @@ private static Map getAll( @Override public Optional getDatabase(String databaseName) { - return getOptional(databaseCache, databaseName); + return getOptional(OTHER, databaseCache, databaseName); } private Optional loadDatabase(String databaseName) @@ -418,7 +428,7 @@ private List loadAllDatabases() @Override public Optional getTable(String databaseName, String tableName) { - return getOptional(tableCache, hiveTableName(databaseName, tableName)); + return getOptional(OTHER, tableCache, hiveTableName(databaseName, tableName)); } private Optional
loadTable(HiveTableName hiveTableName) @@ -481,7 +491,7 @@ private Map mergeColumnStatistics(Map columnStatisticsBuilder = ImmutableMap.builder(); // Populate empty statistics for all requested columns to cache absence of column statistics for future requests. - if (cacheMissing) { + if (cacheMissing.contains(STATS)) { columnStatisticsBuilder.putAll(Iterables.transform( dataColumns, column -> new AbstractMap.SimpleEntry<>(column, HiveColumnStatistics.empty()))); @@ -741,7 +751,7 @@ public Optional> getPartitionNamesByFilter( List columnNames, TupleDomain partitionKeysFilter) { - return getOptional(partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter)); + return getOptional(PARTITION, partitionFilterCache, partitionFilter(databaseName, tableName, columnNames, partitionKeysFilter)); } private Optional> loadPartitionNamesByFilter(PartitionFilter partitionFilter) @@ -949,7 +959,7 @@ public Set listTablePrivileges(String databaseName, String ta @Override public Optional getConfigValue(String name) { - return getOptional(configValuesCache, name); + return getOptional(OTHER, configValuesCache, name); } private Optional loadConfigValue(String name) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastoreConfig.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastoreConfig.java index 9995e2eacf1df..6374bba584e6a 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastoreConfig.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/CachingHiveMetastoreConfig.java @@ -21,6 +21,7 @@ import java.util.Optional; +import static com.google.common.base.MoreObjects.firstNonNull; import static com.google.common.collect.Comparators.max; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.SECONDS; @@ -38,8 +39,10 @@ public class CachingHiveMetastoreConfig private Optional metastoreRefreshInterval = Optional.empty(); private long metastoreCacheMaximumSize = 10000; private int maxMetastoreRefreshThreads = 10; - private boolean cacheMissing = true; private boolean partitionCacheEnabled = true; + private boolean cacheMissing = true; + private Boolean cacheMissingPartitions; + private Boolean cacheMissingStats; @NotNull public Duration getMetastoreCacheTtl() @@ -106,6 +109,18 @@ public CachingHiveMetastoreConfig setMaxMetastoreRefreshThreads(int maxMetastore return this; } + public boolean isPartitionCacheEnabled() + { + return partitionCacheEnabled; + } + + @Config("hive.metastore-cache.cache-partitions") + public CachingHiveMetastoreConfig setPartitionCacheEnabled(boolean enabled) + { + this.partitionCacheEnabled = enabled; + return this; + } + public boolean isCacheMissing() { return cacheMissing; @@ -118,15 +133,27 @@ public CachingHiveMetastoreConfig setCacheMissing(boolean cacheMissing) return this; } - public boolean isPartitionCacheEnabled() + public boolean isCacheMissingPartitions() { - return partitionCacheEnabled; + return firstNonNull(cacheMissingPartitions, cacheMissing); } - @Config("hive.metastore-cache.cache-partitions") - public CachingHiveMetastoreConfig setPartitionCacheEnabled(boolean enabled) + @Config("hive.metastore-cache.cache-missing-partitions") + public CachingHiveMetastoreConfig setCacheMissingPartitions(boolean cacheMissingPartitions) { - this.partitionCacheEnabled = enabled; + this.cacheMissingPartitions = cacheMissingPartitions; + return this; + } + + public boolean isCacheMissingStats() + { + return firstNonNull(cacheMissingStats, cacheMissing); + } + + @Config("hive.metastore-cache.cache-missing-stats") + public CachingHiveMetastoreConfig setCacheMissingStats(boolean cacheMissingStats) + { + this.cacheMissingStats = cacheMissingStats; return this; } } diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java index 44afddb67082b..f60633dfc3bdd 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/cache/SharedHiveMetastoreCache.java @@ -18,12 +18,14 @@ import com.google.common.cache.CacheLoader; import com.google.common.cache.CacheStats; import com.google.common.cache.LoadingCache; +import com.google.common.collect.ImmutableSet; import com.google.common.math.LongMath; import com.google.common.util.concurrent.UncheckedExecutionException; import com.google.inject.Inject; import io.airlift.units.Duration; import io.trino.plugin.hive.metastore.HiveMetastore; import io.trino.plugin.hive.metastore.HiveMetastoreFactory; +import io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.ObjectType; import io.trino.spi.NodeManager; import io.trino.spi.TrinoException; import io.trino.spi.catalog.CatalogName; @@ -35,6 +37,7 @@ import org.weakref.jmx.Nested; import java.util.Optional; +import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.function.Function; @@ -61,7 +64,7 @@ public class SharedHiveMetastoreCache private final Duration userMetastoreCacheTtl; private final long userMetastoreCacheMaximumSize; private final boolean metastorePartitionCacheEnabled; - private final boolean cacheMissing; + private final Set cacheMissing; private ExecutorService executorService; @@ -83,7 +86,17 @@ public SharedHiveMetastoreCache( metastoreRefreshInterval = config.getMetastoreRefreshInterval(); metastoreCacheMaximumSize = config.getMetastoreCacheMaximumSize(); metastorePartitionCacheEnabled = config.isPartitionCacheEnabled(); - cacheMissing = config.isCacheMissing(); + ImmutableSet.Builder cacheMissing = ImmutableSet.builder(); + if (config.isCacheMissing()) { + cacheMissing.add(ObjectType.OTHER); + } + if (config.isCacheMissingPartitions()) { + cacheMissing.add(ObjectType.PARTITION); + } + if (config.isCacheMissingStats()) { + cacheMissing.add(ObjectType.STATS); + } + this.cacheMissing = cacheMissing.build(); userMetastoreCacheTtl = impersonationCachingConfig.getUserMetastoreCacheTtl(); userMetastoreCacheMaximumSize = impersonationCachingConfig.getUserMetastoreCacheMaximumSize(); @@ -147,8 +160,8 @@ private CachingHiveMetastore createCachingHiveMetastore(HiveMetastoreFactory met new ReentrantBoundedExecutor(executorService, maxMetastoreRefreshThreads), metastoreCacheMaximumSize, CachingHiveMetastore.StatsRecording.ENABLED, - cacheMissing, - metastorePartitionCacheEnabled); + metastorePartitionCacheEnabled, + cacheMissing); } public static class CachingHiveMetastoreFactory diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java index 03229a1532d4c..858776e1e6f86 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastore.java @@ -1116,7 +1116,7 @@ private static CachingHiveMetastore createCachingHiveMetastore(HiveMetastore hiv executor, 1000, CachingHiveMetastore.StatsRecording.ENABLED, - cacheMissing, - partitionCacheEnabled); + partitionCacheEnabled, + cacheMissing ? ImmutableSet.copyOf(CachingHiveMetastore.ObjectType.values()) : ImmutableSet.of()); } } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreConfig.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreConfig.java index 1f0e196bd05ab..02316306266d7 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreConfig.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/metastore/cache/TestCachingHiveMetastoreConfig.java @@ -41,8 +41,10 @@ public void testDefaults() .setMetastoreRefreshInterval(null) .setMetastoreCacheMaximumSize(10000) .setMaxMetastoreRefreshThreads(10) + .setPartitionCacheEnabled(true) .setCacheMissing(true) - .setPartitionCacheEnabled(true)); + .setCacheMissingPartitions(true) + .setCacheMissingStats(true)); } @Test @@ -56,6 +58,8 @@ public void testExplicitPropertyMappings() .put("hive.metastore-refresh-max-threads", "2500") .put("hive.metastore-cache.cache-partitions", "false") .put("hive.metastore-cache.cache-missing", "false") + .put("hive.metastore-cache.cache-missing-partitions", "false") + .put("hive.metastore-cache.cache-missing-stats", "false") .buildOrThrow(); CachingHiveMetastoreConfig expected = new CachingHiveMetastoreConfig() @@ -64,8 +68,10 @@ public void testExplicitPropertyMappings() .setMetastoreRefreshInterval(new Duration(30, MINUTES)) .setMetastoreCacheMaximumSize(5000) .setMaxMetastoreRefreshThreads(2500) + .setPartitionCacheEnabled(false) .setCacheMissing(false) - .setPartitionCacheEnabled(false); + .setCacheMissingPartitions(false) + .setCacheMissingStats(false); assertFullMapping(properties, expected); }