entitySpecs,
diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java
index 901bf803d2bca..e3a9d076dbef2 100644
--- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java
+++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java
@@ -19,6 +19,13 @@ public class QueryConfiguration {
private String queryRegex;
@Builder.Default private boolean simpleQuery = true;
+
+ /**
+ * Used to determine if standard structured query logic should be applied when relevant, i.e.
+ * fullText flag is false. Will not be added in cases where simpleQuery would be the standard.
+ */
+ @Builder.Default private boolean structuredQuery = true;
+
@Builder.Default private boolean exactMatchQuery = true;
@Builder.Default private boolean prefixMatchQuery = true;
private BoolQueryConfiguration boolQuery;
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
index b1b24ac97f0b8..676b80c8bea32 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java
@@ -154,7 +154,8 @@ public BrowseResultV2 browseV2(
@Nonnull String input,
int start,
int count,
- @Nonnull Authentication authentication)
+ @Nonnull Authentication authentication,
+ @Nullable SearchFlags searchFlags)
throws RemoteInvocationException;
/**
@@ -176,7 +177,8 @@ public BrowseResultV2 browseV2(
@Nonnull String input,
int start,
int count,
- @Nonnull Authentication authentication)
+ @Nonnull Authentication authentication,
+ @Nullable SearchFlags searchFlags)
throws RemoteInvocationException;
@Deprecated
diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
index 3108345bd3937..653ef046ffc02 100644
--- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
+++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java
@@ -378,7 +378,8 @@ public BrowseResultV2 browseV2(
@Nonnull String input,
int start,
int count,
- @Nonnull Authentication authentication) {
+ @Nonnull Authentication authentication,
+ @Nullable SearchFlags searchFlags) {
throw new NotImplementedException("BrowseV2 is not implemented in Restli yet");
}
@@ -391,7 +392,8 @@ public BrowseResultV2 browseV2(
@Nonnull String input,
int start,
int count,
- @Nonnull Authentication authentication)
+ @Nonnull Authentication authentication,
+ @Nullable SearchFlags searchFlags)
throws RemoteInvocationException {
throw new NotImplementedException("BrowseV2 is not implemented in Restli yet");
}
diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
index 2fec88ad221fd..0d1c031db136e 100644
--- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
+++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java
@@ -197,6 +197,7 @@ BrowseResult browse(
* @param input search query
* @param start start offset of first group
* @param count max number of results requested
+ * @param searchFlags configuration options for search
*/
@Nonnull
public BrowseResultV2 browseV2(
@@ -205,7 +206,8 @@ public BrowseResultV2 browseV2(
@Nullable Filter filter,
@Nonnull String input,
int start,
- int count);
+ int count,
+ @Nullable SearchFlags searchFlags);
/**
* Gets browse snapshot of a given path
@@ -216,6 +218,7 @@ public BrowseResultV2 browseV2(
* @param input search query
* @param start start offset of first group
* @param count max number of results requested
+ * @param searchFlags configuration options for search
*/
@Nonnull
public BrowseResultV2 browseV2(
@@ -224,7 +227,8 @@ public BrowseResultV2 browseV2(
@Nullable Filter filter,
@Nonnull String input,
int start,
- int count);
+ int count,
+ @Nullable SearchFlags searchFlags);
/**
* Gets a list of paths for a given urn.
From 63322225d96c5e19e11b78cb14eba13c30642027 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 7 Feb 2024 14:41:49 -0600
Subject: [PATCH 4/7] misc: datahub-upgrade improvements, aspect key & default
aspects fixes (#9796)
---
.../upgrade/UpgradeCliApplication.java | 10 ++-
.../config/BackfillBrowsePathsV2Config.java | 10 ++-
.../ReindexDataJobViaNodesCLLConfig.java | 8 +-
.../upgrade/config/SystemUpdateCondition.java | 14 ++++
.../upgrade/config/SystemUpdateConfig.java | 23 ++++++
.../entity/steps/BackfillBrowsePathsV2.java | 16 +++-
.../steps/BackfillBrowsePathsV2Step.java | 66 +++++++++++-----
.../system/via/ReindexDataJobViaNodesCLL.java | 9 ++-
.../via/ReindexDataJobViaNodesCLLStep.java | 28 ++++---
.../DatahubUpgradeNoSchemaRegistryTest.java | 24 +++++-
...pgradeCliApplicationTestConfiguration.java | 17 ++++-
.../com/linkedin/metadata/EventUtils.java | 2 +-
.../metadata/entity/EntityServiceImpl.java | 39 +++++++---
.../metadata/entity/EntityServiceTest.java | 48 +++++++++++-
.../src/main/resources/application.yml | 8 ++
.../factory/entity/EntityServiceFactory.java | 19 +----
.../DUHESchemaRegistryFactory.java | 40 ----------
.../InternalSchemaRegistryFactory.java | 12 ---
.../SchemaRegistryServiceFactory.java | 20 +++++
.../SystemUpdateSchemaRegistryFactory.java | 66 ++++++++++++++++
.../linkedin/metadata/boot/BootstrapStep.java | 21 +----
.../boot/kafka/MockDUHESerializer.java | 57 --------------
...java => MockSystemUpdateDeserializer.java} | 49 ++++++------
.../kafka/MockSystemUpdateSerializer.java | 76 +++++++++++++++++++
.../resources/entity/AspectResourceTest.java | 2 +-
25 files changed, 451 insertions(+), 233 deletions(-)
create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java
delete mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java
create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java
create mode 100644 metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java
delete mode 100644 metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java
rename metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/{MockDUHEDeserializer.java => MockSystemUpdateDeserializer.java} (57%)
create mode 100644 metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java
index ff8bd542fbdff..50847da07be73 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java
@@ -2,6 +2,10 @@
import com.linkedin.gms.factory.auth.AuthorizerChainFactory;
import com.linkedin.gms.factory.auth.DataHubAuthorizerFactory;
+import com.linkedin.gms.factory.graphql.GraphQLEngineFactory;
+import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory;
+import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory;
+import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
import org.springframework.boot.WebApplicationType;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@@ -24,7 +28,11 @@
classes = {
ScheduledAnalyticsFactory.class,
AuthorizerChainFactory.class,
- DataHubAuthorizerFactory.class
+ DataHubAuthorizerFactory.class,
+ SimpleKafkaConsumerFactory.class,
+ KafkaEventConsumerFactory.class,
+ InternalSchemaRegistryFactory.class,
+ GraphQLEngineFactory.class
})
})
public class UpgradeCliApplication {
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java
index 406963c58fd71..2b2f4648f76e7 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java
@@ -3,6 +3,7 @@
import com.linkedin.datahub.upgrade.system.entity.steps.BackfillBrowsePathsV2;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.search.SearchService;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -11,7 +12,12 @@ public class BackfillBrowsePathsV2Config {
@Bean
public BackfillBrowsePathsV2 backfillBrowsePathsV2(
- EntityService> entityService, SearchService searchService) {
- return new BackfillBrowsePathsV2(entityService, searchService);
+ EntityService> entityService,
+ SearchService searchService,
+ @Value("${systemUpdate.browsePathsV2.enabled}") final boolean enabled,
+ @Value("${systemUpdate.browsePathsV2.reprocess.enabled}") final boolean reprocessEnabled,
+ @Value("${systemUpdate.browsePathsV2.batchSize}") final Integer batchSize) {
+ return new BackfillBrowsePathsV2(
+ entityService, searchService, enabled, reprocessEnabled, batchSize);
}
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java
index 06311e1853874..83dad80944f5f 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java
@@ -2,6 +2,7 @@
import com.linkedin.datahub.upgrade.system.via.ReindexDataJobViaNodesCLL;
import com.linkedin.metadata.entity.EntityService;
+import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@@ -9,7 +10,10 @@
public class ReindexDataJobViaNodesCLLConfig {
@Bean
- public ReindexDataJobViaNodesCLL _reindexDataJobViaNodesCLL(EntityService> entityService) {
- return new ReindexDataJobViaNodesCLL(entityService);
+ public ReindexDataJobViaNodesCLL _reindexDataJobViaNodesCLL(
+ EntityService> entityService,
+ @Value("${systemUpdate.dataJobNodeCLL.enabled}") final boolean enabled,
+ @Value("${systemUpdate.dataJobNodeCLL.batchSize}") final Integer batchSize) {
+ return new ReindexDataJobViaNodesCLL(entityService, enabled, batchSize);
}
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java
new file mode 100644
index 0000000000000..ea432dfa9f7df
--- /dev/null
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java
@@ -0,0 +1,14 @@
+package com.linkedin.datahub.upgrade.config;
+
+import org.springframework.boot.ApplicationArguments;
+import org.springframework.context.annotation.Condition;
+import org.springframework.context.annotation.ConditionContext;
+import org.springframework.core.type.AnnotatedTypeMetadata;
+
+public class SystemUpdateCondition implements Condition {
+ @Override
+ public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) {
+ return context.getBeanFactory().getBean(ApplicationArguments.class).getNonOptionArgs().stream()
+ .anyMatch("SystemUpdate"::equals);
+ }
+}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
index 177d4b531ba86..cde3a29248fb5 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java
@@ -8,6 +8,7 @@
import com.linkedin.gms.factory.common.TopicConventionFactory;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory;
+import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory;
import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig;
import com.linkedin.metadata.config.kafka.KafkaConfiguration;
import com.linkedin.metadata.dao.producer.KafkaEventProducer;
@@ -21,9 +22,12 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Conditional;
import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Primary;
@Slf4j
@Configuration
@@ -74,4 +78,23 @@ protected KafkaEventProducer duheKafkaEventProducer(
duheSchemaRegistryConfig, kafkaConfiguration, properties));
return new KafkaEventProducer(producer, topicConvention, kafkaHealthChecker);
}
+
+ /**
+ * The ReindexDataJobViaNodesCLLConfig step requires publishing to MCL. Overriding the default
+ * producer with this special producer which doesn't require an active registry.
+ *
+ * Use when INTERNAL registry and is SYSTEM_UPDATE
+ *
+ *
This forces this producer into the EntityService
+ */
+ @Primary
+ @Bean(name = "kafkaEventProducer")
+ @Conditional(SystemUpdateCondition.class)
+ @ConditionalOnProperty(
+ name = "kafka.schemaRegistry.type",
+ havingValue = InternalSchemaRegistryFactory.TYPE)
+ protected KafkaEventProducer kafkaEventProducer(
+ @Qualifier("duheKafkaEventProducer") KafkaEventProducer kafkaEventProducer) {
+ return kafkaEventProducer;
+ }
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java
index 4b9fc5bba0204..9b023e1e239a2 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java
@@ -11,8 +11,20 @@ public class BackfillBrowsePathsV2 implements Upgrade {
private final List _steps;
- public BackfillBrowsePathsV2(EntityService> entityService, SearchService searchService) {
- _steps = ImmutableList.of(new BackfillBrowsePathsV2Step(entityService, searchService));
+ public BackfillBrowsePathsV2(
+ EntityService> entityService,
+ SearchService searchService,
+ boolean enabled,
+ boolean reprocessEnabled,
+ Integer batchSize) {
+ if (enabled) {
+ _steps =
+ ImmutableList.of(
+ new BackfillBrowsePathsV2Step(
+ entityService, searchService, reprocessEnabled, batchSize));
+ } else {
+ _steps = ImmutableList.of();
+ }
}
@Override
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
index 601ce4d25493c..2d64e0052ae82 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java
@@ -16,6 +16,7 @@
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil;
+import com.linkedin.metadata.boot.BootstrapStep;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.query.SearchFlags;
import com.linkedin.metadata.query.filter.Condition;
@@ -37,9 +38,8 @@
@Slf4j
public class BackfillBrowsePathsV2Step implements UpgradeStep {
- public static final String BACKFILL_BROWSE_PATHS_V2 = "BACKFILL_BROWSE_PATHS_V2";
- public static final String REPROCESS_DEFAULT_BROWSE_PATHS_V2 =
- "REPROCESS_DEFAULT_BROWSE_PATHS_V2";
+ private static final String UPGRADE_ID = "BackfillBrowsePathsV2Step";
+ private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID);
public static final String DEFAULT_BROWSE_PATH_V2 = "␟Default";
private static final Set ENTITY_TYPES_TO_MIGRATE =
@@ -53,14 +53,22 @@ public class BackfillBrowsePathsV2Step implements UpgradeStep {
Constants.ML_MODEL_GROUP_ENTITY_NAME,
Constants.ML_FEATURE_TABLE_ENTITY_NAME,
Constants.ML_FEATURE_ENTITY_NAME);
- private static final Integer BATCH_SIZE = 5000;
- private final EntityService> _entityService;
- private final SearchService _searchService;
-
- public BackfillBrowsePathsV2Step(EntityService> entityService, SearchService searchService) {
- _searchService = searchService;
- _entityService = entityService;
+ private final EntityService> entityService;
+ private final SearchService searchService;
+
+ private final boolean reprocessEnabled;
+ private final Integer batchSize;
+
+ public BackfillBrowsePathsV2Step(
+ EntityService> entityService,
+ SearchService searchService,
+ boolean reprocessEnabled,
+ Integer batchSize) {
+ this.searchService = searchService;
+ this.entityService = entityService;
+ this.reprocessEnabled = reprocessEnabled;
+ this.batchSize = batchSize;
}
@Override
@@ -78,11 +86,14 @@ public Function executable() {
log.info(
String.format(
"Upgrading batch %s-%s of browse paths for entity type %s",
- migratedCount, migratedCount + BATCH_SIZE, entityType));
+ migratedCount, migratedCount + batchSize, entityType));
scrollId = backfillBrowsePathsV2(entityType, auditStamp, scrollId);
- migratedCount += BATCH_SIZE;
+ migratedCount += batchSize;
} while (scrollId != null);
}
+
+ BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService);
+
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED);
};
}
@@ -91,27 +102,27 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S
final Filter filter;
- if (System.getenv().containsKey(REPROCESS_DEFAULT_BROWSE_PATHS_V2)
- && Boolean.parseBoolean(System.getenv(REPROCESS_DEFAULT_BROWSE_PATHS_V2))) {
+ if (reprocessEnabled) {
filter = backfillDefaultBrowsePathsV2Filter();
} else {
filter = backfillBrowsePathsV2Filter();
}
final ScrollResult scrollResult =
- _searchService.scrollAcrossEntities(
+ searchService.scrollAcrossEntities(
ImmutableList.of(entityType),
"*",
filter,
null,
scrollId,
null,
- BATCH_SIZE,
+ batchSize,
new SearchFlags()
.setFulltext(true)
.setSkipCache(true)
.setSkipHighlighting(true)
.setSkipAggregates(true));
+
if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) {
return null;
}
@@ -183,7 +194,7 @@ private Filter backfillDefaultBrowsePathsV2Filter() {
private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception {
BrowsePathsV2 browsePathsV2 =
- DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, _entityService);
+ DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, entityService);
log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2));
MetadataChangeProposal proposal = new MetadataChangeProposal();
proposal.setEntityUrn(urn);
@@ -193,12 +204,12 @@ private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exceptio
proposal.setSystemMetadata(
new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis()));
proposal.setAspect(GenericRecordUtils.serializeAspect(browsePathsV2));
- _entityService.ingestProposal(proposal, auditStamp, true);
+ entityService.ingestProposal(proposal, auditStamp, true);
}
@Override
public String id() {
- return "BackfillBrowsePathsV2Step";
+ return UPGRADE_ID;
}
/**
@@ -211,7 +222,22 @@ public boolean isOptional() {
}
@Override
+ /**
+ * Returns whether the upgrade should be skipped. Uses previous run history or the environment
+ * variables REPROCESS_DEFAULT_BROWSE_PATHS_V2 & BACKFILL_BROWSE_PATHS_V2 to determine whether to
+ * skip.
+ */
public boolean skip(UpgradeContext context) {
- return !Boolean.parseBoolean(System.getenv(BACKFILL_BROWSE_PATHS_V2));
+ boolean envEnabled = Boolean.parseBoolean(System.getenv("BACKFILL_BROWSE_PATHS_V2"));
+
+ if (reprocessEnabled && envEnabled) {
+ return false;
+ }
+
+ boolean previouslyRun = entityService.exists(UPGRADE_ID_URN, true);
+ if (previouslyRun) {
+ log.info("{} was already run. Skipping.", id());
+ }
+ return (previouslyRun || !envEnabled);
}
}
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java
index 41179a50c4b54..59975693322d1 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java
@@ -18,8 +18,13 @@ public class ReindexDataJobViaNodesCLL implements Upgrade {
private final List _steps;
- public ReindexDataJobViaNodesCLL(EntityService> entityService) {
- _steps = ImmutableList.of(new ReindexDataJobViaNodesCLLStep(entityService));
+ public ReindexDataJobViaNodesCLL(
+ EntityService> entityService, boolean enabled, Integer batchSize) {
+ if (enabled) {
+ _steps = ImmutableList.of(new ReindexDataJobViaNodesCLLStep(entityService, batchSize));
+ } else {
+ _steps = ImmutableList.of();
+ }
}
@Override
diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java
index 70afbc3d205b2..56166caf5b57e 100644
--- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java
+++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java
@@ -11,7 +11,6 @@
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs;
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult;
-import java.net.URISyntaxException;
import java.util.function.Function;
import lombok.extern.slf4j.Slf4j;
@@ -21,12 +20,12 @@ public class ReindexDataJobViaNodesCLLStep implements UpgradeStep {
private static final String UPGRADE_ID = "via-node-cll-reindex-datajob";
private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID);
- private static final Integer BATCH_SIZE = 5000;
+ private final EntityService> entityService;
+ private final Integer batchSize;
- private final EntityService _entityService;
-
- public ReindexDataJobViaNodesCLLStep(EntityService entityService) {
- _entityService = entityService;
+ public ReindexDataJobViaNodesCLLStep(EntityService> entityService, Integer batchSize) {
+ this.entityService = entityService;
+ this.batchSize = batchSize;
}
@Override
@@ -35,17 +34,16 @@ public Function executable() {
RestoreIndicesArgs args =
new RestoreIndicesArgs()
.setAspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)
- .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%");
+ .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%")
+ .setBatchSize(batchSize);
RestoreIndicesResult result =
- _entityService.restoreIndices(args, x -> context.report().addLine((String) x));
+ entityService.restoreIndices(args, x -> context.report().addLine((String) x));
context.report().addLine("Rows migrated: " + result.rowsMigrated);
context.report().addLine("Rows ignored: " + result.ignored);
- try {
- BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, _entityService);
- context.report().addLine("State updated: " + UPGRADE_ID_URN);
- } catch (URISyntaxException e) {
- throw new RuntimeException(e);
- }
+
+ BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService);
+ context.report().addLine("State updated: " + UPGRADE_ID_URN);
+
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED);
};
}
@@ -70,7 +68,7 @@ public boolean isOptional() {
* variable SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT to determine whether to skip.
*/
public boolean skip(UpgradeContext context) {
- boolean previouslyRun = _entityService.exists(UPGRADE_ID_URN, true);
+ boolean previouslyRun = entityService.exists(UPGRADE_ID_URN, true);
boolean envFlagRecommendsSkip =
Boolean.parseBoolean(System.getenv("SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT"));
if (previouslyRun) {
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java
index 83b8e028727ce..4c9e12c0ed151 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java
@@ -4,6 +4,8 @@
import static org.testng.AssertJUnit.assertNotNull;
import com.linkedin.datahub.upgrade.system.SystemUpdate;
+import com.linkedin.metadata.dao.producer.KafkaEventProducer;
+import com.linkedin.metadata.entity.EntityServiceImpl;
import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -19,19 +21,37 @@
classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class},
properties = {
"kafka.schemaRegistry.type=INTERNAL",
- "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic"
- })
+ "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic",
+ "METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic"
+ },
+ args = {"-u", "SystemUpdate"})
public class DatahubUpgradeNoSchemaRegistryTest extends AbstractTestNGSpringContextTests {
@Autowired
@Named("systemUpdate")
private SystemUpdate systemUpdate;
+ @Autowired
+ @Named("kafkaEventProducer")
+ private KafkaEventProducer kafkaEventProducer;
+
+ @Autowired
+ @Named("duheKafkaEventProducer")
+ private KafkaEventProducer duheKafkaEventProducer;
+
+ @Autowired private EntityServiceImpl entityService;
+
@Test
public void testSystemUpdateInit() {
assertNotNull(systemUpdate);
}
+ @Test
+ public void testSystemUpdateKafkaProducerOverride() {
+ assertEquals(kafkaEventProducer, duheKafkaEventProducer);
+ assertEquals(entityService.get_producer(), duheKafkaEventProducer);
+ }
+
@Test
public void testSystemUpdateSend() {
UpgradeStepResult.Result result =
diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
index be28b7f739cf5..5c2d6fff0f07c 100644
--- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
+++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java
@@ -1,15 +1,21 @@
package com.linkedin.datahub.upgrade;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
import com.linkedin.gms.factory.auth.SystemAuthenticationFactory;
-import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.models.registry.ConfigEntityRegistry;
import com.linkedin.metadata.models.registry.EntityRegistry;
+import com.linkedin.metadata.registry.SchemaRegistryService;
import com.linkedin.metadata.search.SearchService;
import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders;
import io.ebean.Database;
+import java.util.Optional;
import org.springframework.boot.test.context.TestConfiguration;
import org.springframework.boot.test.mock.mockito.MockBean;
+import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Import;
@TestConfiguration
@@ -20,8 +26,6 @@ public class UpgradeCliApplicationTestConfiguration {
@MockBean private Database ebeanServer;
- @MockBean private EntityService> _entityService;
-
@MockBean private SearchService searchService;
@MockBean private GraphService graphService;
@@ -31,4 +35,11 @@ public class UpgradeCliApplicationTestConfiguration {
@MockBean ConfigEntityRegistry configEntityRegistry;
@MockBean public EntityIndexBuilders entityIndexBuilders;
+
+ @Bean
+ public SchemaRegistryService schemaRegistryService() {
+ SchemaRegistryService mockService = mock(SchemaRegistryService.class);
+ when(mockService.getSchemaIdForTopic(anyString())).thenReturn(Optional.of(0));
+ return mockService;
+ }
}
diff --git a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
index 645c2fe210e09..adff32d5d336d 100644
--- a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
+++ b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java
@@ -57,7 +57,7 @@ public class EventUtils {
private static final Schema ORIGINAL_MCP_AVRO_SCHEMA =
getAvroSchemaFromResource("avro/com/linkedin/mxe/MetadataChangeProposal.avsc");
- private static final Schema ORIGINAL_MCL_AVRO_SCHEMA =
+ public static final Schema ORIGINAL_MCL_AVRO_SCHEMA =
getAvroSchemaFromResource("avro/com/linkedin/mxe/MetadataChangeLog.avsc");
private static final Schema ORIGINAL_FMCL_AVRO_SCHEMA =
diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
index 7f15e3a7fd8fc..eec5c6120886d 100644
--- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
+++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java
@@ -15,6 +15,7 @@
import com.codahale.metrics.Timer;
import com.datahub.util.RecordUtils;
import com.datahub.util.exception.ModelConversionException;
+import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
@@ -146,7 +147,8 @@ public class EntityServiceImpl implements EntityService {
private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3;
protected final AspectDao _aspectDao;
- private final EventProducer _producer;
+
+ @VisibleForTesting @Getter private final EventProducer _producer;
private final EntityRegistry _entityRegistry;
private final Map> _entityToValidAspects;
private RetentionService _retentionService;
@@ -637,10 +639,15 @@ public List ingestAspects(
@Override
public List ingestAspects(
@Nonnull final AspectsBatch aspectsBatch, boolean emitMCL, boolean overwrite) {
+ Set items = new HashSet<>(aspectsBatch.getItems());
+
+ // Generate additional items as needed
+ items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2));
+ AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build();
Timer.Context ingestToLocalDBTimer =
MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time();
- List ingestResults = ingestAspectsToLocalDB(aspectsBatch, overwrite);
+ List ingestResults = ingestAspectsToLocalDB(withDefaults, overwrite);
List mclResults = emitMCL(ingestResults, emitMCL);
ingestToLocalDBTimer.stop();
@@ -964,7 +971,7 @@ public IngestResult ingestProposal(
*/
@Override
public Set ingestProposal(AspectsBatch aspectsBatch, final boolean async) {
- Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch);
+ Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch, async);
Stream nonTimeseriesIngestResults =
async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(aspectsBatch);
@@ -978,7 +985,8 @@ public Set ingestProposal(AspectsBatch aspectsBatch, final boolean
* @param aspectsBatch timeseries upserts batch
* @return returns ingest proposal result, however was never in the MCP topic
*/
- private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch) {
+ private Stream ingestTimeseriesProposal(
+ AspectsBatch aspectsBatch, final boolean async) {
List extends BatchItem> unsupported =
aspectsBatch.getItems().stream()
.filter(
@@ -992,6 +1000,20 @@ private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch)
+ unsupported.stream().map(BatchItem::getChangeType).collect(Collectors.toSet()));
}
+ if (!async) {
+ // Create default non-timeseries aspects for timeseries aspects
+ List timeseriesItems =
+ aspectsBatch.getItems().stream()
+ .filter(item -> item.getAspectSpec().isTimeseries())
+ .collect(Collectors.toList());
+
+ List defaultAspects =
+ DefaultAspectsUtil.getAdditionalChanges(
+ AspectsBatchImpl.builder().items(timeseriesItems).build(), this, enableBrowseV2);
+ ingestProposalSync(AspectsBatchImpl.builder().items(defaultAspects).build());
+ }
+
+ // Emit timeseries MCLs
List, Boolean>>>> timeseriesResults =
aspectsBatch.getItems().stream()
.filter(item -> item.getAspectSpec().isTimeseries())
@@ -1080,17 +1102,10 @@ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) {
}
private Stream ingestProposalSync(AspectsBatch aspectsBatch) {
- Set items = new HashSet<>(aspectsBatch.getItems());
-
- // Generate additional items as needed
- items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2));
-
- AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build();
-
AspectsBatchImpl nonTimeseries =
AspectsBatchImpl.builder()
.items(
- withDefaults.getItems().stream()
+ aspectsBatch.getItems().stream()
.filter(item -> !item.getAspectSpec().isTimeseries())
.collect(Collectors.toList()))
.build();
diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
index ea4e97d264bca..384b54c7a1c8d 100644
--- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
+++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java
@@ -479,7 +479,7 @@ public void testIngestAspectsGetLatestAspects() throws Exception {
assertTrue(DataTemplateUtil.areEqual(writeAspect1, latestAspects.get(aspectName1)));
assertTrue(DataTemplateUtil.areEqual(writeAspect2, latestAspects.get(aspectName2)));
- verify(_mockProducer, times(2))
+ verify(_mockProducer, times(3))
.produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any());
verifyNoMoreInteractions(_mockProducer);
@@ -772,6 +772,12 @@ public void testUpdateGetAspect() throws AssertionError {
.produceMetadataChangeLog(
Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any());
+ verify(_mockProducer, times(1))
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")),
+ Mockito.any());
+
verifyNoMoreInteractions(_mockProducer);
}
@@ -824,6 +830,13 @@ public void testGetAspectAtVersion() throws AssertionError {
readAspect1 = _entityServiceImpl.getVersionedAspect(entityUrn, aspectName, -1);
assertFalse(DataTemplateUtil.areEqual(writtenVersionedAspect1, readAspect1));
+ // check key aspect
+ verify(_mockProducer, times(1))
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpuser").getAspectSpec("corpUserKey")),
+ Mockito.any());
+
verifyNoMoreInteractions(_mockProducer);
}
@@ -1094,13 +1107,22 @@ public void testIngestGetLatestAspect() throws AssertionError {
ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class);
verify(_mockProducer, times(1))
- .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture());
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")),
+ mclCaptor.capture());
MetadataChangeLog mcl = mclCaptor.getValue();
assertEquals(mcl.getEntityType(), "corpuser");
assertNull(mcl.getPreviousAspectValue());
assertNull(mcl.getPreviousSystemMetadata());
assertEquals(mcl.getChangeType(), ChangeType.UPSERT);
+ verify(_mockProducer, times(1))
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")),
+ Mockito.any());
+
verifyNoMoreInteractions(_mockProducer);
reset(_mockProducer);
@@ -1201,7 +1223,16 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception {
EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1));
verify(_mockProducer, times(2))
- .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any());
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")),
+ Mockito.any());
+
+ verify(_mockProducer, times(1))
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")),
+ Mockito.any());
verifyNoMoreInteractions(_mockProducer);
}
@@ -1234,9 +1265,18 @@ public void testIngestSameAspect() throws AssertionError {
RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName);
assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspect1));
+ verify(_mockProducer, times(1))
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")),
+ Mockito.any());
+
ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class);
verify(_mockProducer, times(1))
- .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture());
+ .produceMetadataChangeLog(
+ Mockito.eq(entityUrn),
+ Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")),
+ mclCaptor.capture());
MetadataChangeLog mcl = mclCaptor.getValue();
assertEquals(mcl.getEntityType(), "corpuser");
assertNull(mcl.getPreviousAspectValue());
diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml
index d4c11d4aa53bd..c2a0d508b57d6 100644
--- a/metadata-service/configuration/src/main/resources/application.yml
+++ b/metadata-service/configuration/src/main/resources/application.yml
@@ -314,6 +314,14 @@ systemUpdate:
maxBackOffs: ${BOOTSTRAP_SYSTEM_UPDATE_MAX_BACK_OFFS:50}
backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s
waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true}
+ dataJobNodeCLL:
+ enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED:true}
+ batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:200}
+ browsePathsV2:
+ enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true}
+ batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_BATCH_SIZE:5000}
+ reprocess:
+ enabled: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:false}
structuredProperties:
enabled: ${ENABLE_STRUCTURED_PROPERTIES_HOOK:true} # applies structured properties mappings
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
index 871f16d97be33..2ccdee5fb1dbf 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java
@@ -1,20 +1,15 @@
package com.linkedin.gms.factory.entity;
import com.linkedin.datahub.graphql.featureflags.FeatureFlags;
-import com.linkedin.gms.factory.common.TopicConventionFactory;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.metadata.dao.producer.KafkaEventProducer;
-import com.linkedin.metadata.dao.producer.KafkaHealthChecker;
import com.linkedin.metadata.entity.AspectDao;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.EntityServiceImpl;
import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem;
import com.linkedin.metadata.models.registry.EntityRegistry;
import com.linkedin.metadata.service.UpdateIndicesService;
-import com.linkedin.mxe.TopicConvention;
import javax.annotation.Nonnull;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.kafka.clients.producer.Producer;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
@@ -28,26 +23,16 @@ public class EntityServiceFactory {
private Integer _ebeanMaxTransactionRetry;
@Bean(name = "entityService")
- @DependsOn({
- "entityAspectDao",
- "kafkaEventProducer",
- "kafkaHealthChecker",
- TopicConventionFactory.TOPIC_CONVENTION_BEAN,
- "entityRegistry"
- })
+ @DependsOn({"entityAspectDao", "kafkaEventProducer", "entityRegistry"})
@Nonnull
protected EntityService createInstance(
- Producer producer,
- TopicConvention convention,
- KafkaHealthChecker kafkaHealthChecker,
+ @Qualifier("kafkaEventProducer") final KafkaEventProducer eventProducer,
@Qualifier("entityAspectDao") AspectDao aspectDao,
EntityRegistry entityRegistry,
ConfigurationProvider configurationProvider,
UpdateIndicesService updateIndicesService,
@Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) {
- final KafkaEventProducer eventProducer =
- new KafkaEventProducer(producer, convention, kafkaHealthChecker);
FeatureFlags featureFlags = configurationProvider.getFeatureFlags();
return new EntityServiceImpl(
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java
deleted file mode 100644
index 4819984307af9..0000000000000
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package com.linkedin.gms.factory.kafka.schemaregistry;
-
-import static com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener.TOPIC_NAME;
-
-import com.linkedin.gms.factory.config.ConfigurationProvider;
-import com.linkedin.metadata.boot.kafka.MockDUHEDeserializer;
-import com.linkedin.metadata.boot.kafka.MockDUHESerializer;
-import com.linkedin.metadata.config.kafka.KafkaConfiguration;
-import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
-import java.util.HashMap;
-import java.util.Map;
-import lombok.extern.slf4j.Slf4j;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.context.annotation.Bean;
-import org.springframework.context.annotation.Configuration;
-
-@Slf4j
-@Configuration
-public class DUHESchemaRegistryFactory {
-
- public static final String DUHE_SCHEMA_REGISTRY_TOPIC_KEY = "duheTopicName";
-
- @Value(TOPIC_NAME)
- private String duheTopicName;
-
- /** Configure Kafka Producer/Consumer processes with a custom schema registry. */
- @Bean("duheSchemaRegistryConfig")
- protected SchemaRegistryConfig duheSchemaRegistryConfig(ConfigurationProvider provider) {
- Map props = new HashMap<>();
- KafkaConfiguration kafkaConfiguration = provider.getKafka();
-
- props.put(
- AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG,
- kafkaConfiguration.getSchemaRegistry().getUrl());
- props.put(DUHE_SCHEMA_REGISTRY_TOPIC_KEY, duheTopicName);
-
- log.info("DataHub System Update Registry");
- return new SchemaRegistryConfig(MockDUHESerializer.class, MockDUHEDeserializer.class, props);
- }
-}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java
index 8c814e5054758..46b27195ecc67 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java
@@ -1,11 +1,7 @@
package com.linkedin.gms.factory.kafka.schemaregistry;
-import com.linkedin.gms.factory.common.TopicConventionFactory;
import com.linkedin.gms.factory.config.ConfigurationProvider;
import com.linkedin.metadata.config.kafka.KafkaConfiguration;
-import com.linkedin.metadata.registry.SchemaRegistryService;
-import com.linkedin.metadata.registry.SchemaRegistryServiceImpl;
-import com.linkedin.mxe.TopicConvention;
import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
import io.confluent.kafka.serializers.KafkaAvroDeserializer;
import io.confluent.kafka.serializers.KafkaAvroSerializer;
@@ -17,7 +13,6 @@
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
-import org.springframework.context.annotation.DependsOn;
@Slf4j
@Configuration
@@ -45,11 +40,4 @@ protected SchemaRegistryConfig getInstance(
kafkaConfiguration.getSchemaRegistry().getUrl());
return new SchemaRegistryConfig(KafkaAvroSerializer.class, KafkaAvroDeserializer.class, props);
}
-
- @Bean(name = "schemaRegistryService")
- @Nonnull
- @DependsOn({TopicConventionFactory.TOPIC_CONVENTION_BEAN})
- protected SchemaRegistryService schemaRegistryService(TopicConvention convention) {
- return new SchemaRegistryServiceImpl(convention);
- }
}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java
new file mode 100644
index 0000000000000..a6869321d796f
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java
@@ -0,0 +1,20 @@
+package com.linkedin.gms.factory.kafka.schemaregistry;
+
+import com.linkedin.gms.factory.common.TopicConventionFactory;
+import com.linkedin.metadata.registry.SchemaRegistryService;
+import com.linkedin.metadata.registry.SchemaRegistryServiceImpl;
+import com.linkedin.mxe.TopicConvention;
+import javax.annotation.Nonnull;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.DependsOn;
+
+@Configuration
+public class SchemaRegistryServiceFactory {
+ @Bean(name = "schemaRegistryService")
+ @Nonnull
+ @DependsOn({TopicConventionFactory.TOPIC_CONVENTION_BEAN})
+ protected SchemaRegistryService schemaRegistryService(TopicConvention convention) {
+ return new SchemaRegistryServiceImpl(convention);
+ }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java
new file mode 100644
index 0000000000000..d02cdc0e68f52
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java
@@ -0,0 +1,66 @@
+package com.linkedin.gms.factory.kafka.schemaregistry;
+
+import static com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener.TOPIC_NAME;
+
+import com.linkedin.gms.factory.config.ConfigurationProvider;
+import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer;
+import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer;
+import com.linkedin.metadata.config.kafka.KafkaConfiguration;
+import com.linkedin.metadata.registry.SchemaRegistryService;
+import com.linkedin.mxe.Topics;
+import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig;
+import java.util.HashMap;
+import java.util.Map;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+@Slf4j
+@Configuration
+public class SystemUpdateSchemaRegistryFactory {
+
+ public static final String SYSTEM_UPDATE_TOPIC_KEY_PREFIX = "data-hub.system-update.topic-key.";
+ public static final String SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX = ".id";
+
+ public static final String DUHE_SCHEMA_REGISTRY_TOPIC_KEY =
+ SYSTEM_UPDATE_TOPIC_KEY_PREFIX + "duhe";
+ public static final String MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY =
+ SYSTEM_UPDATE_TOPIC_KEY_PREFIX + "mcl-versioned";
+
+ @Value(TOPIC_NAME)
+ private String duheTopicName;
+
+ @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}")
+ private String mclTopicName;
+
+ /** Configure Kafka Producer/Consumer processes with a custom schema registry. */
+ @Bean("duheSchemaRegistryConfig")
+ protected SchemaRegistryConfig duheSchemaRegistryConfig(
+ final ConfigurationProvider provider, final SchemaRegistryService schemaRegistryService) {
+ Map props = new HashMap<>();
+ KafkaConfiguration kafkaConfiguration = provider.getKafka();
+
+ props.put(
+ AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG,
+ kafkaConfiguration.getSchemaRegistry().getUrl());
+
+ // topic names
+ props.putAll(
+ Map.of(
+ DUHE_SCHEMA_REGISTRY_TOPIC_KEY, duheTopicName,
+ MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY, mclTopicName));
+
+ // topic ordinals
+ props.putAll(
+ Map.of(
+ DUHE_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX,
+ schemaRegistryService.getSchemaIdForTopic(duheTopicName).get().toString(),
+ MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX,
+ schemaRegistryService.getSchemaIdForTopic(mclTopicName).get().toString()));
+
+ log.info("DataHub System Update Registry");
+ return new SchemaRegistryConfig(
+ MockSystemUpdateSerializer.class, MockSystemUpdateDeserializer.class, props);
+ }
+}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java
index a79bdacfc55e9..2dccda4243bca 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java
@@ -1,16 +1,15 @@
package com.linkedin.metadata.boot;
-import com.linkedin.common.AuditStamp;
import com.linkedin.common.urn.Urn;
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.key.DataHubUpgradeKey;
+import com.linkedin.metadata.utils.AuditStampUtils;
import com.linkedin.metadata.utils.EntityKeyUtils;
import com.linkedin.metadata.utils.GenericRecordUtils;
import com.linkedin.mxe.MetadataChangeProposal;
import com.linkedin.upgrade.DataHubUpgradeResult;
-import java.net.URISyntaxException;
import javax.annotation.Nonnull;
/** A single step in the Bootstrap process. */
@@ -40,24 +39,10 @@ static Urn getUpgradeUrn(String upgradeId) {
new DataHubUpgradeKey().setId(upgradeId), Constants.DATA_HUB_UPGRADE_ENTITY_NAME);
}
- static void setUpgradeResult(Urn urn, EntityService> entityService) throws URISyntaxException {
- final AuditStamp auditStamp =
- new AuditStamp()
- .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR))
- .setTime(System.currentTimeMillis());
+ static void setUpgradeResult(Urn urn, EntityService> entityService) {
final DataHubUpgradeResult upgradeResult =
new DataHubUpgradeResult().setTimestampMs(System.currentTimeMillis());
- // Workaround because entity service does not auto-generate the key aspect for us
- final MetadataChangeProposal keyProposal = new MetadataChangeProposal();
- final DataHubUpgradeKey upgradeKey = new DataHubUpgradeKey().setId(urn.getId());
- keyProposal.setEntityUrn(urn);
- keyProposal.setEntityType(Constants.DATA_HUB_UPGRADE_ENTITY_NAME);
- keyProposal.setAspectName(Constants.DATA_HUB_UPGRADE_KEY_ASPECT_NAME);
- keyProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeKey));
- keyProposal.setChangeType(ChangeType.UPSERT);
- entityService.ingestProposal(keyProposal, auditStamp, false);
-
// Ingest the upgrade result
final MetadataChangeProposal upgradeProposal = new MetadataChangeProposal();
upgradeProposal.setEntityUrn(urn);
@@ -65,6 +50,6 @@ static void setUpgradeResult(Urn urn, EntityService> entityService) throws URI
upgradeProposal.setAspectName(Constants.DATA_HUB_UPGRADE_RESULT_ASPECT_NAME);
upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeResult));
upgradeProposal.setChangeType(ChangeType.UPSERT);
- entityService.ingestProposal(upgradeProposal, auditStamp, false);
+ entityService.ingestProposal(upgradeProposal, AuditStampUtils.createDefaultAuditStamp(), false);
}
}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java
deleted file mode 100644
index 36fe514d5536f..0000000000000
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.linkedin.metadata.boot.kafka;
-
-import static com.linkedin.gms.factory.kafka.schemaregistry.DUHESchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY;
-
-import com.linkedin.metadata.EventUtils;
-import io.confluent.kafka.schemaregistry.avro.AvroSchema;
-import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
-import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
-import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
-import io.confluent.kafka.serializers.KafkaAvroSerializer;
-import java.io.IOException;
-import java.util.Map;
-import lombok.extern.slf4j.Slf4j;
-
-/** Used for early bootstrap to avoid contact with not yet existing schema registry */
-@Slf4j
-public class MockDUHESerializer extends KafkaAvroSerializer {
-
- private static final String DATAHUB_UPGRADE_HISTORY_EVENT_SUBJECT_SUFFIX = "-value";
-
- private String topicName;
-
- public MockDUHESerializer() {
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
-
- public MockDUHESerializer(SchemaRegistryClient client) {
- super(client);
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
-
- public MockDUHESerializer(SchemaRegistryClient client, Map props) {
- super(client, props);
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
-
- @Override
- public void configure(Map configs, boolean isKey) {
- super.configure(configs, isKey);
- topicName = configs.get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY).toString();
- }
-
- private MockSchemaRegistryClient buildMockSchemaRegistryClient() {
- MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
- try {
- schemaRegistry.register(
- topicToSubjectName(topicName), new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA));
- return schemaRegistry;
- } catch (IOException | RestClientException e) {
- throw new RuntimeException(e);
- }
- }
-
- public static String topicToSubjectName(String topicName) {
- return topicName + DATAHUB_UPGRADE_HISTORY_EVENT_SUBJECT_SUFFIX;
- }
-}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java
similarity index 57%
rename from metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java
rename to metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java
index e631f776abd08..74a20cdacbb21 100644
--- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java
@@ -1,50 +1,49 @@
package com.linkedin.metadata.boot.kafka;
-import static com.linkedin.gms.factory.kafka.schemaregistry.DUHESchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY;
-import static com.linkedin.metadata.boot.kafka.MockDUHESerializer.topicToSubjectName;
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY;
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX;
+import static com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer.topicToSubjectName;
import com.linkedin.metadata.EventUtils;
import io.confluent.kafka.schemaregistry.ParsedSchema;
import io.confluent.kafka.schemaregistry.avro.AvroSchema;
import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
-import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient;
import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
import io.confluent.kafka.serializers.KafkaAvroDeserializer;
import java.io.IOException;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
-/** Used for early bootstrap to avoid contact with not yet existing schema registry */
+/**
+ * Used for early bootstrap to avoid contact with not yet existing schema registry Only supports the
+ * DUHE topic
+ */
@Slf4j
-public class MockDUHEDeserializer extends KafkaAvroDeserializer {
+public class MockSystemUpdateDeserializer extends KafkaAvroDeserializer {
private String topicName;
-
- public MockDUHEDeserializer() {
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
-
- public MockDUHEDeserializer(SchemaRegistryClient client) {
- super(client);
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
-
- public MockDUHEDeserializer(SchemaRegistryClient client, Map props) {
- super(client, props);
- this.schemaRegistry = buildMockSchemaRegistryClient();
- }
+ private Integer schemaId;
@Override
public void configure(Map configs, boolean isKey) {
super.configure(configs, isKey);
topicName = configs.get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY).toString();
+ schemaId =
+ Integer.valueOf(
+ configs
+ .get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX)
+ .toString());
+ this.schemaRegistry = buildMockSchemaRegistryClient();
}
private MockSchemaRegistryClient buildMockSchemaRegistryClient() {
- MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient2();
+ MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient2(schemaId);
try {
schemaRegistry.register(
- topicToSubjectName(topicName), new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA));
+ topicToSubjectName(topicName),
+ new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA),
+ 0,
+ schemaId);
return schemaRegistry;
} catch (IOException | RestClientException e) {
throw new RuntimeException(e);
@@ -52,13 +51,19 @@ private MockSchemaRegistryClient buildMockSchemaRegistryClient() {
}
public static class MockSchemaRegistryClient2 extends MockSchemaRegistryClient {
+ private final int schemaId;
+
+ public MockSchemaRegistryClient2(int schemaId) {
+ this.schemaId = schemaId;
+ }
+
/**
* Previously used topics can have schema ids > 1 which fully match however we are replacing
* that registry so force schema id to 1
*/
@Override
public synchronized ParsedSchema getSchemaById(int id) throws IOException, RestClientException {
- return super.getSchemaById(1);
+ return super.getSchemaById(schemaId);
}
}
}
diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java
new file mode 100644
index 0000000000000..14aac2758a69d
--- /dev/null
+++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java
@@ -0,0 +1,76 @@
+package com.linkedin.metadata.boot.kafka;
+
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY;
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY;
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX;
+import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_PREFIX;
+
+import com.linkedin.metadata.EventUtils;
+import com.linkedin.util.Pair;
+import io.confluent.kafka.schemaregistry.avro.AvroSchema;
+import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient;
+import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException;
+import io.confluent.kafka.serializers.KafkaAvroSerializer;
+import java.io.IOException;
+import java.util.Map;
+import java.util.stream.Collectors;
+import lombok.extern.slf4j.Slf4j;
+
+/** Used for early bootstrap to avoid contact with not yet existing schema registry */
+@Slf4j
+public class MockSystemUpdateSerializer extends KafkaAvroSerializer {
+
+ private static final String DATAHUB_SYSTEM_UPDATE_SUBJECT_SUFFIX = "-value";
+
+ private static final Map AVRO_SCHEMA_MAP =
+ Map.of(
+ DUHE_SCHEMA_REGISTRY_TOPIC_KEY, new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA),
+ MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY,
+ new AvroSchema(EventUtils.ORIGINAL_MCL_AVRO_SCHEMA));
+
+ private Map> topicNameToAvroSchemaMap;
+
+ @Override
+ public void configure(Map configs, boolean isKey) {
+ super.configure(configs, isKey);
+ topicNameToAvroSchemaMap =
+ configs.entrySet().stream()
+ .filter(
+ e ->
+ e.getKey().startsWith(SYSTEM_UPDATE_TOPIC_KEY_PREFIX)
+ && !e.getKey().endsWith(SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX)
+ && e.getValue() instanceof String)
+ .map(
+ e -> {
+ Integer id =
+ Integer.valueOf(
+ (String) configs.get(e.getKey() + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX));
+ return Pair.of(
+ (String) e.getValue(), Pair.of(AVRO_SCHEMA_MAP.get(e.getKey()), id));
+ })
+ .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+ this.schemaRegistry = buildMockSchemaRegistryClient();
+ }
+
+ private MockSchemaRegistryClient buildMockSchemaRegistryClient() {
+ MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient();
+
+ if (topicNameToAvroSchemaMap != null) {
+ topicNameToAvroSchemaMap.forEach(
+ (topicName, schemaId) -> {
+ try {
+ schemaRegistry.register(
+ topicToSubjectName(topicName), schemaId.getFirst(), 0, schemaId.getSecond());
+ } catch (IOException | RestClientException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ return schemaRegistry;
+ }
+
+ public static String topicToSubjectName(String topicName) {
+ return topicName + DATAHUB_SYSTEM_UPDATE_SUBJECT_SUFFIX;
+ }
+}
diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
index 1678fe92ec70e..17c5160494722 100644
--- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
+++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java
@@ -122,7 +122,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException {
.request(req)
.build())));
_aspectResource.ingestProposal(mcp, "false");
- verify(_producer, times(5))
+ verify(_producer, times(10))
.produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class));
verifyNoMoreInteractions(_producer);
}
From 09d4260b18f63e33eda930bd067b4e9e65067478 Mon Sep 17 00:00:00 2001
From: david-leifker <114954101+david-leifker@users.noreply.github.com>
Date: Wed, 7 Feb 2024 17:03:22 -0600
Subject: [PATCH 5/7] chore(pegasus): upgrade pegasus, remove gradle 8 hacks
for pegasus plugin (#9798)
---
build.gradle | 15 +-
buildSrc/build.gradle | 3 -
.../pegasus/gradle/PegasusPlugin.java | 2444 -----------------
.../gradle/tasks/ChangedFileReportTask.java | 124 -
4 files changed, 8 insertions(+), 2578 deletions(-)
delete mode 100644 buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java
delete mode 100644 buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java
diff --git a/build.gradle b/build.gradle
index ba61d97f0ed6e..ea81d26355027 100644
--- a/build.gradle
+++ b/build.gradle
@@ -30,7 +30,7 @@ buildscript {
ext.junitJupiterVersion = '5.6.1'
// Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md
- ext.pegasusVersion = '29.48.4'
+ ext.pegasusVersion = '29.51.0'
ext.mavenVersion = '3.6.3'
ext.springVersion = '6.1.2'
ext.springBootVersion = '3.2.1'
@@ -269,13 +269,14 @@ allprojects {
apply plugin: 'eclipse'
// apply plugin: 'org.gradlex.java-ecosystem-capabilities'
- tasks.withType(Test).configureEach {
- // https://docs.gradle.org/current/userguide/performance.html
- maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
+ tasks.withType(Test).configureEach { task -> if (task.project.name != "metadata-io") {
+ // https://docs.gradle.org/current/userguide/performance.html
+ maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
- if (project.configurations.getByName("testImplementation").getDependencies()
- .any{ it.getName().contains("testng") }) {
- useTestNG()
+ if (project.configurations.getByName("testImplementation").getDependencies()
+ .any { it.getName().contains("testng") }) {
+ useTestNG()
+ }
}
}
diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
index 0c2d91e1f7ac1..88900e06d4845 100644
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@@ -25,7 +25,4 @@ dependencies {
compileOnly 'org.projectlombok:lombok:1.18.30'
annotationProcessor 'org.projectlombok:lombok:1.18.30'
-
- // pegasus dependency, overrides for tasks
- implementation 'com.linkedin.pegasus:gradle-plugins:29.48.4'
}
\ No newline at end of file
diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java
deleted file mode 100644
index 2460abcad6f9e..0000000000000
--- a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java
+++ /dev/null
@@ -1,2444 +0,0 @@
-/*
- * Copyright (c) 2019 LinkedIn Corp.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.linkedin.pegasus.gradle;
-
-import com.linkedin.pegasus.gradle.PegasusOptions.IdlOptions;
-import com.linkedin.pegasus.gradle.internal.CompatibilityLogChecker;
-import com.linkedin.pegasus.gradle.tasks.ChangedFileReportTask;
-import com.linkedin.pegasus.gradle.tasks.CheckIdlTask;
-import com.linkedin.pegasus.gradle.tasks.CheckPegasusSnapshotTask;
-import com.linkedin.pegasus.gradle.tasks.CheckRestModelTask;
-import com.linkedin.pegasus.gradle.tasks.CheckSnapshotTask;
-import com.linkedin.pegasus.gradle.tasks.GenerateAvroSchemaTask;
-import com.linkedin.pegasus.gradle.tasks.GenerateDataTemplateTask;
-import com.linkedin.pegasus.gradle.tasks.GeneratePegasusSnapshotTask;
-import com.linkedin.pegasus.gradle.tasks.GenerateRestClientTask;
-import com.linkedin.pegasus.gradle.tasks.GenerateRestModelTask;
-import com.linkedin.pegasus.gradle.tasks.PublishRestModelTask;
-import com.linkedin.pegasus.gradle.tasks.TranslateSchemasTask;
-import com.linkedin.pegasus.gradle.tasks.ValidateExtensionSchemaTask;
-import com.linkedin.pegasus.gradle.tasks.ValidateSchemaAnnotationTask;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Method;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.function.Function;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-
-import org.gradle.api.Action;
-import org.gradle.api.GradleException;
-import org.gradle.api.Plugin;
-import org.gradle.api.Project;
-import org.gradle.api.Task;
-import org.gradle.api.artifacts.Configuration;
-import org.gradle.api.artifacts.ConfigurationContainer;
-import org.gradle.api.file.FileCollection;
-import org.gradle.api.plugins.JavaBasePlugin;
-import org.gradle.api.plugins.JavaPlugin;
-import org.gradle.api.plugins.JavaPluginConvention;
-import org.gradle.api.plugins.JavaPluginExtension;
-import org.gradle.api.publish.PublishingExtension;
-import org.gradle.api.publish.ivy.IvyPublication;
-import org.gradle.api.publish.ivy.plugins.IvyPublishPlugin;
-import org.gradle.api.tasks.Copy;
-import org.gradle.api.tasks.Delete;
-import org.gradle.api.tasks.SourceSet;
-import org.gradle.api.tasks.SourceSetContainer;
-import org.gradle.api.tasks.Sync;
-import org.gradle.api.tasks.TaskProvider;
-import org.gradle.api.tasks.bundling.Jar;
-import org.gradle.api.tasks.compile.JavaCompile;
-import org.gradle.api.tasks.javadoc.Javadoc;
-import org.gradle.language.base.plugins.LifecycleBasePlugin;
-import org.gradle.language.jvm.tasks.ProcessResources;
-import org.gradle.plugins.ide.eclipse.EclipsePlugin;
-import org.gradle.plugins.ide.eclipse.model.EclipseModel;
-import org.gradle.plugins.ide.idea.IdeaPlugin;
-import org.gradle.plugins.ide.idea.model.IdeaModule;
-import org.gradle.util.GradleVersion;
-
-
-/**
- * Pegasus code generation plugin.
- * The supported project layout for this plugin is as follows:
- *
- *
- * --- api/
- * | --- build.gradle
- * | --- src/
- * | --- <sourceSet>/
- * | | --- idl/
- * | | | --- <published idl (.restspec.json) files>
- * | | --- java/
- * | | | --- <packageName>/
- * | | | --- <common java files>
- * | | --- pegasus/
- * | | --- <packageName>/
- * | | --- <data schema (.pdsc) files>
- * | --- <sourceSet>GeneratedDataTemplate/
- * | | --- java/
- * | | --- <packageName>/
- * | | --- <data template source files generated from data schema (.pdsc) files>
- * | --- <sourceSet>GeneratedAvroSchema/
- * | | --- avro/
- * | | --- <packageName>/
- * | | --- <avsc avro schema files (.avsc) generated from pegasus schema files>
- * | --- <sourceSet>GeneratedRest/
- * | --- java/
- * | --- <packageName>/
- * | --- <rest client source (.java) files generated from published idl>
- * --- impl/
- * | --- build.gradle
- * | --- src/
- * | --- <sourceSet>/
- * | | --- java/
- * | | --- <packageName>/
- * | | --- <resource class source (.java) files>
- * | --- <sourceSet>GeneratedRest/
- * | --- idl/
- * | --- <generated idl (.restspec.json) files>
- * --- <other projects>/
- *
- *
- * -
- * api: contains all the files which are commonly depended by the server and
- * client implementation. The common files include the data schema (.pdsc) files,
- * the idl (.restspec.json) files and potentially Java interface files used by both sides.
- *
- * -
- * impl: contains the resource class for server implementation.
- *
- *
- * Performs the following functions:
- *
- * Generate data model and data template jars for each source set.
- *
- * Overview:
- *
- *
- * In the api project, the plugin generates the data template source (.java) files from the
- * data schema (.pdsc) files, and furthermore compiles the source files and packages them
- * to jar files. Details of jar contents will be explained in following paragraphs.
- * In general, data schema files should exist only in api projects.
- *
- *
- *
- * Configure the server and client implementation projects to depend on the
- * api project's dataTemplate configuration to get access to the generated data templates
- * from within these projects. This allows api classes to be built first so that implementation
- * projects can consume them. We recommend this structure to avoid circular dependencies
- * (directly or indirectly) among implementation projects.
- *
- *
- * Detail:
- *
- *
- * Generates data template source (.java) files from data schema (.pdsc) files,
- * compiles the data template source (.java) files into class (.class) files,
- * creates a data model jar file and a data template jar file.
- * The data model jar file contains the source data schema (.pdsc) files.
- * The data template jar file contains both the source data schema (.pdsc) files
- * and the generated data template class (.class) files.
- *
- *
- *
- * In the data template generation phase, the plugin creates a new target source set
- * for the generated files. The new target source set's name is the input source set name's
- * suffixed with "GeneratedDataTemplate", e.g. "mainGeneratedDataTemplate".
- * The plugin invokes PegasusDataTemplateGenerator to generate data template source (.java) files
- * for all data schema (.pdsc) files present in the input source set's pegasus
- * directory, e.g. "src/main/pegasus". The generated data template source (.java) files
- * will be in the new target source set's java source directory, e.g.
- * "src/mainGeneratedDataTemplate/java". In addition to
- * the data schema (.pdsc) files in the pegasus directory, the dataModel configuration
- * specifies resolver path for the PegasusDataTemplateGenerator. The resolver path
- * provides the data schemas and previously generated data template classes that
- * may be referenced by the input source set's data schemas. In most cases, the dataModel
- * configuration should contain data template jars.
- *
- *
- *
- * The next phase is the data template compilation phase, the plugin compiles the generated
- * data template source (.java) files into class files. The dataTemplateCompile configuration
- * specifies the pegasus jars needed to compile these classes. The compileClasspath of the
- * target source set is a composite of the dataModel configuration which includes the data template
- * classes that were previously generated and included in the dependent data template jars,
- * and the dataTemplateCompile configuration.
- * This configuration should specify a dependency on the Pegasus data jar.
- *
- *
- *
- * The following phase is creating the the data model jar and the data template jar.
- * This plugin creates the data model jar that includes the contents of the
- * input source set's pegasus directory, and sets the jar file's classification to
- * "data-model". Hence, the resulting jar file's name should end with "-data-model.jar".
- * It adds the data model jar as an artifact to the dataModel configuration.
- * This jar file should only contain data schema (.pdsc) files.
- *
- *
- *
- * This plugin also create the data template jar that includes the contents of the input
- * source set's pegasus directory and the java class output directory of the
- * target source set. It sets the jar file's classification to "data-template".
- * Hence, the resulting jar file's name should end with "-data-template.jar".
- * It adds the data template jar file as an artifact to the dataTemplate configuration.
- * This jar file contains both data schema (.pdsc) files and generated data template
- * class (.class) files.
- *
- *
- *
- * This plugin will ensure that data template source files are generated before
- * compiling the input source set and before the idea and eclipse tasks. It
- * also adds the generated classes to the compileClasspath of the input source set.
- *
- *
- *
- * The configurations that apply to generating the data model and data template jars
- * are as follow:
- *
- * -
- * The dataTemplateCompile configuration specifies the classpath for compiling
- * the generated data template source (.java) files. In most cases,
- * it should be the Pegasus data jar.
- * (The default compile configuration is not used for compiling data templates because
- * it is not desirable to include non data template dependencies in the data template jar.)
- * The configuration should not directly include data template jars. Data template jars
- * should be included in the dataModel configuration.
- *
- * -
- * The dataModel configuration provides the value of the "generator.resolver.path"
- * system property that is passed to PegasusDataTemplateGenerator. In most cases,
- * this configuration should contain only data template jars. The data template jars
- * contain both data schema (.pdsc) files and generated data template (.class) files.
- * PegasusDataTemplateGenerator will not generate data template (.java) files for
- * classes that can be found in the resolver path. This avoids redundant generation
- * of the same classes, and inclusion of these classes in multiple jars.
- * The dataModel configuration is also used to publish the data model jar which
- * contains only data schema (.pdsc) files.
- *
- * -
- * The testDataModel configuration is similar to the dataModel configuration
- * except it is used when generating data templates from test source sets.
- * It extends from the dataModel configuration. It is also used to publish
- * the data model jar from test source sets.
- *
- * -
- * The dataTemplate configuration is used to publish the data template
- * jar which contains both data schema (.pdsc) files and the data template class
- * (.class) files generated from these data schema (.pdsc) files.
- *
- * -
- * The testDataTemplate configuration is similar to the dataTemplate configuration
- * except it is used when publishing the data template jar files generated from
- * test source sets.
- *
- *
- *
- *
- * Performs the following functions:
- *
- * Generate avro schema jars for each source set.
- *
- * Overview:
- *
- *
- * In the api project, the task 'generateAvroSchema' generates the avro schema (.avsc)
- * files from pegasus schema (.pdsc) files. In general, data schema files should exist
- * only in api projects.
- *
- *
- *
- * Configure the server and client implementation projects to depend on the
- * api project's avroSchema configuration to get access to the generated avro schemas
- * from within these projects.
- *
- *
- *
- * This plugin also create the avro schema jar that includes the contents of the input
- * source set's avro directory and the avsc schema files.
- * The resulting jar file's name should end with "-avro-schema.jar".
- *
- *
- * Generate rest model and rest client jars for each source set.
- *
- * Overview:
- *
- *
- * In the api project, generates rest client source (.java) files from the idl,
- * compiles the rest client source (.java) files to rest client class (.class) files
- * and puts them in jar files. In general, the api project should be only place that
- * contains the publishable idl files. If the published idl changes an existing idl
- * in the api project, the plugin will emit message indicating this has occurred and
- * suggest that the entire project be rebuilt if it is desirable for clients of the
- * idl to pick up the newly published changes.
- *
- *
- *
- * In the impl project, generates the idl (.restspec.json) files from the input
- * source set's resource class files, then compares them against the existing idl
- * files in the api project for compatibility checking. If incompatible changes are
- * found, the build fails (unless certain flag is specified, see below). If the
- * generated idl passes compatibility checks (see compatibility check levels below),
- * publishes the generated idl (.restspec.json) to the api project.
- *
- *
- * Detail:
- *
- * rest client generation phase: in api project
- *
- *
- * In this phase, the rest client source (.java) files are generated from the
- * api project idl (.restspec.json) files using RestRequestBuilderGenerator.
- * The generated rest client source files will be in the new target source set's
- * java source directory, e.g. "src/mainGeneratedRest/java".
- *
- *
- *
- * RestRequestBuilderGenerator requires access to the data schemas referenced
- * by the idl. The dataModel configuration specifies the resolver path needed
- * by RestRequestBuilderGenerator to access the data schemas referenced by
- * the idl that is not in the source set's pegasus directory.
- * This plugin automatically includes the data schema (.pdsc) files in the
- * source set's pegasus directory in the resolver path.
- * In most cases, the dataModel configuration should contain data template jars.
- * The data template jars contains both data schema (.pdsc) files and generated
- * data template class (.class) files. By specifying data template jars instead
- * of data model jars, redundant generation of data template classes is avoided
- * as classes that can be found in the resolver path are not generated.
- *
- *
- * rest client compilation phase: in api project
- *
- *
- * In this phase, the plugin compiles the generated rest client source (.java)
- * files into class files. The restClientCompile configuration specifies the
- * pegasus jars needed to compile these classes. The compile classpath is a
- * composite of the dataModel configuration which includes the data template
- * classes that were previously generated and included in the dependent data template
- * jars, and the restClientCompile configuration.
- * This configuration should specify a dependency on the Pegasus restli-client jar.
- *
- *
- *
- * The following stage is creating the the rest model jar and the rest client jar.
- * This plugin creates the rest model jar that includes the
- * generated idl (.restspec.json) files, and sets the jar file's classification to
- * "rest-model". Hence, the resulting jar file's name should end with "-rest-model.jar".
- * It adds the rest model jar as an artifact to the restModel configuration.
- * This jar file should only contain idl (.restspec.json) files.
- *
- *
- *
- * This plugin also create the rest client jar that includes the generated
- * idl (.restspec.json) files and the java class output directory of the
- * target source set. It sets the jar file's classification to "rest-client".
- * Hence, the resulting jar file's name should end with "-rest-client.jar".
- * It adds the rest client jar file as an artifact to the restClient configuration.
- * This jar file contains both idl (.restspec.json) files and generated rest client
- * class (.class) files.
- *
- *
- * idl generation phase: in server implementation project
- *
- *
- * Before entering this phase, the plugin will ensure that generating idl will
- * occur after compiling the input source set. It will also ensure that IDEA
- * and Eclipse tasks runs after rest client source (.java) files are generated.
- *
- *
- *
- * In this phase, the plugin creates a new target source set for the generated files.
- * The new target source set's name is the input source set name's* suffixed with
- * "GeneratedRest", e.g. "mainGeneratedRest". The plugin invokes
- * RestLiResourceModelExporter to generate idl (.restspec.json) files for each
- * IdlItem in the input source set's pegasus IdlOptions. The generated idl files
- * will be in target source set's idl directory, e.g. "src/mainGeneratedRest/idl".
- * For example, the following adds an IdlItem to the source set's pegasus IdlOptions.
- * This line should appear in the impl project's build.gradle. If no IdlItem is added,
- * this source set will be excluded from generating idl and checking idl compatibility,
- * even there are existing idl files.
- *
- * pegasus.main.idlOptions.addIdlItem(["com.linkedin.restli.examples.groups.server"])
- *
- *
- *
- *
- * After the idl generation phase, each included idl file is checked for compatibility against
- * those in the api project. In case the current interface breaks compatibility,
- * by default the build fails and reports all compatibility errors and warnings. Otherwise,
- * the build tasks in the api project later will package the resource classes into jar files.
- * User can change the compatibility requirement between the current and published idl by
- * setting the "rest.model.compatibility" project property, i.e.
- * "gradle -Prest.model.compatibility= ..." The following levels are supported:
- *
- * - ignore: idl compatibility check will occur but its result will be ignored.
- * The result will be aggregated and printed at the end of the build.
- * - backwards: build fails if there are backwards incompatible changes in idl.
- * Build continues if there are only compatible changes.
- * - equivalent (default): build fails if there is any functional changes (compatible or
- * incompatible) in the current idl. Only docs and comments are allowed to be different.
- *
- * The plugin needs to know where the api project is. It searches the api project in the
- * following steps. If all searches fail, the build fails.
- *
- * -
- * Use the specified project from the impl project build.gradle file. The ext.apiProject
- * property explicitly assigns the api project. E.g.
- *
- * ext.apiProject = project(':groups:groups-server-api')
- *
- * If multiple such statements exist, the last will be used. Wrong project path causes Gradle
- * evaluation error.
- *
- * -
- * If no ext.apiProject property is defined, the plugin will try to guess the
- * api project name with the following conventions. The search stops at the first successful match.
- *
- * -
- * If the impl project name ends with the following suffixes, substitute the suffix with "-api".
- *
- * - -impl
- * - -service
- * - -server
- * - -server-impl
- *
- * This list can be overridden by inserting the following line to the project build.gradle:
- *
- * ext.apiProjectSubstitutionSuffixes = ['-new-suffix-1', '-new-suffix-2']
- *
- * Alternatively, this setting could be applied globally to all projects by putting it in
- * the subprojects section of the root build.gradle.
- *
- * -
- * Append "-api" to the impl project name.
- *
- *
- *
- *
- * The plugin invokes RestLiResourceModelCompatibilityChecker to check compatibility.
- *
- *
- *
- * The idl files in the api project are not generated by the plugin, but rather
- * "published" from the impl project. The publishRestModel task is used to copy the
- * idl files to the api project. This task is invoked automatically if the idls are
- * verified to be "safe". "Safe" is determined by the "rest.model.compatibility"
- * property. Because this task is skipped if the idls are functionally equivalent
- * (not necessarily identical, e.g. differ in doc fields), if the default "equivalent"
- * compatibility level is used, no file will be copied. If such automatic publishing
- * is intended to be skip, set the "rest.model.skipPublish" property to true.
- * Note that all the properties are per-project and can be overridden in each project's
- * build.gradle file.
- *
- *
- *
- * Please always keep in mind that if idl publishing is happened, a subsequent whole-project
- * rebuild is necessary to pick up the changes. Otherwise, the Hudson job will fail and
- * the source code commit will fail.
- *
- *
- *
- * The configurations that apply to generating the rest model and rest client jars
- * are as follow:
- *
- * -
- * The restClientCompile configuration specifies the classpath for compiling
- * the generated rest client source (.java) files. In most cases,
- * it should be the Pegasus restli-client jar.
- * (The default compile configuration is not used for compiling rest client because
- * it is not desirable to include non rest client dependencies, such as
- * the rest server implementation classes, in the data template jar.)
- * The configuration should not directly include data template jars. Data template jars
- * should be included in the dataModel configuration.
- *
- * -
- * The dataModel configuration provides the value of the "generator.resolver.path"
- * system property that is passed to RestRequestBuilderGenerator.
- * This configuration should contain only data template jars. The data template jars
- * contain both data schema (.pdsc) files and generated data template (.class) files.
- * The RestRequestBuilderGenerator will only generate rest client classes.
- * The dataModel configuration is also included in the compile classpath for the
- * generated rest client source files. The dataModel configuration does not
- * include generated data template classes, then the Java compiler may not able to
- * find the data template classes referenced by the generated rest client.
- *
- * -
- * The testDataModel configuration is similar to the dataModel configuration
- * except it is used when generating rest client source files from
- * test source sets.
- *
- * -
- * The restModel configuration is used to publish the rest model jar
- * which contains generated idl (.restspec.json) files.
- *
- * -
- * The testRestModel configuration is similar to the restModel configuration
- * except it is used to publish rest model jar files generated from
- * test source sets.
- *
- * -
- * The restClient configuration is used to publish the rest client jar
- * which contains both generated idl (.restspec.json) files and
- * the rest client class (.class) files generated from from these
- * idl (.restspec.json) files.
- *
- * -
- * The testRestClient configuration is similar to the restClient configuration
- * except it is used to publish rest client jar files generated from
- * test source sets.
- *
- *
- *
- *
- *
- * This plugin considers test source sets whose names begin with 'test' or 'integTest' to be
- * test source sets.
- *
- */
-public class PegasusPlugin implements Plugin
-{
- public static boolean debug = false;
-
- private static final GradleVersion MIN_REQUIRED_VERSION = GradleVersion.version("1.0"); // Next: 5.2.1
- private static final GradleVersion MIN_SUGGESTED_VERSION = GradleVersion.version("5.2.1"); // Next: 5.3
-
- //
- // Constants for generating sourceSet names and corresponding directory names
- // for generated code
- //
- private static final String DATA_TEMPLATE_GEN_TYPE = "DataTemplate";
- private static final String REST_GEN_TYPE = "Rest";
- private static final String AVRO_SCHEMA_GEN_TYPE = "AvroSchema";
-
- public static final String DATA_TEMPLATE_FILE_SUFFIX = ".pdsc";
- public static final String PDL_FILE_SUFFIX = ".pdl";
- // gradle property to opt OUT schema annotation validation, by default this feature is enabled.
- private static final String DISABLE_SCHEMA_ANNOTATION_VALIDATION = "schema.annotation.validation.disable";
- // gradle property to opt in for destroying stale files from the build directory,
- // by default it is disabled, because it triggers hot-reload (even if it results in a no-op)
- private static final String DESTROY_STALE_FILES_ENABLE = "enableDestroyStaleFiles";
- public static final Collection DATA_TEMPLATE_FILE_SUFFIXES = new ArrayList<>();
-
- public static final String IDL_FILE_SUFFIX = ".restspec.json";
- public static final String SNAPSHOT_FILE_SUFFIX = ".snapshot.json";
- public static final String SNAPSHOT_COMPAT_REQUIREMENT = "rest.model.compatibility";
- public static final String IDL_COMPAT_REQUIREMENT = "rest.idl.compatibility";
- // Pegasus schema compatibility level configuration, which is used to define the {@link CompatibilityLevel}.
- public static final String PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.pegasusSchema.compatibility";
- // Pegasus extension schema compatibility level configuration, which is used to define the {@link CompatibilityLevel}
- public static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.extensionSchema.compatibility";
- // CompatibilityOptions Mode configuration, which is used to define the {@link CompatibilityOptions#Mode} in the compatibility checker.
- private static final String PEGASUS_COMPATIBILITY_MODE = "pegasusPlugin.pegasusSchemaCompatibilityCheckMode";
-
- private static final Pattern TEST_DIR_REGEX = Pattern.compile("^(integ)?[Tt]est");
- private static final String SNAPSHOT_NO_PUBLISH = "rest.model.noPublish";
- private static final String SNAPSHOT_FORCE_PUBLISH = "rest.model.forcePublish";
- private static final String PROCESS_EMPTY_IDL_DIR = "rest.idl.processEmptyIdlDir";
- private static final String IDL_NO_PUBLISH = "rest.idl.noPublish";
- private static final String IDL_FORCE_PUBLISH = "rest.idl.forcePublish";
- private static final String SKIP_IDL_CHECK = "rest.idl.skipCheck";
- // gradle property to skip running GenerateRestModel task.
- // Note it affects GenerateRestModel task only, and does not skip tasks depends on GenerateRestModel.
- private static final String SKIP_GENERATE_REST_MODEL= "rest.model.skipGenerateRestModel";
- private static final String SUPPRESS_REST_CLIENT_RESTLI_2 = "rest.client.restli2.suppress";
- private static final String SUPPRESS_REST_CLIENT_RESTLI_1 = "rest.client.restli1.suppress";
-
- private static final String GENERATOR_CLASSLOADER_NAME = "pegasusGeneratorClassLoader";
-
- private static final String CONVERT_TO_PDL_REVERSE = "convertToPdl.reverse";
- private static final String CONVERT_TO_PDL_KEEP_ORIGINAL = "convertToPdl.keepOriginal";
- private static final String CONVERT_TO_PDL_SKIP_VERIFICATION = "convertToPdl.skipVerification";
- private static final String CONVERT_TO_PDL_PRESERVE_SOURCE_CMD = "convertToPdl.preserveSourceCmd";
-
- // Below variables are used to collect data across all pegasus projects (sub-projects) and then print information
- // to the user at the end after build is finished.
- private static StringBuffer _restModelCompatMessage = new StringBuffer();
- private static final Collection _needCheckinFiles = new ArrayList<>();
- private static final Collection _needBuildFolders = new ArrayList<>();
- private static final Collection _possibleMissingFilesInEarlierCommit = new ArrayList<>();
-
- private static final String RUN_ONCE = "runOnce";
- private static final Object STATIC_PROJECT_EVALUATED_LOCK = new Object();
-
- private static final List UNUSED_CONFIGURATIONS = Arrays.asList(
- "dataTemplateGenerator", "restTools", "avroSchemaGenerator");
- // Directory in the dataTemplate jar that holds schemas translated from PDL to PDSC.
- private static final String TRANSLATED_SCHEMAS_DIR = "legacyPegasusSchemas";
- // Enable the use of argFiles for the tasks that support them
- private static final String ENABLE_ARG_FILE = "pegasusPlugin.enableArgFile";
- // Enable the generation of fluent APIs
- private static final String ENABLE_FLUENT_API = "pegasusPlugin.enableFluentApi";
-
- // This config impacts GenerateDataTemplateTask and GenerateRestClientTask;
- // If not set, by default all paths generated in these two tasks will be lower-case.
- // This default behavior is needed because Linux, MacOS, Windows treat case sensitive paths differently,
- // and we want to be consistent, so we choose lower-case as default case for path generated
- private static final String CODE_GEN_PATH_CASE_SENSITIVE = "pegasusPlugin.generateCaseSensitivePath";
-
- private static final String PEGASUS_PLUGIN_CONFIGURATION = "pegasusPlugin";
-
- // Enable the use of generic pegasus schema compatibility checker
- private static final String ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK = "pegasusPlugin.enablePegasusSchemaCompatibilityCheck";
-
- private static final String PEGASUS_SCHEMA_SNAPSHOT = "PegasusSchemaSnapshot";
-
- private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT = "PegasusExtensionSchemaSnapshot";
-
- private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR = "pegasusSchemaSnapshot";
-
- private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR = "pegasusExtensionSchemaSnapshot";
-
- private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusSchemaSnapshotDir";
-
- private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusExtensionSchemaSnapshotDir";
-
- private static final String SRC = "src";
-
- private static final String SCHEMA_ANNOTATION_HANDLER_CONFIGURATION = "schemaAnnotationHandler";
-
- private static final String COMPATIBILITY_OPTIONS_MODE_EXTENSION = "EXTENSION";
-
-
- @SuppressWarnings("unchecked")
- private Class extends Plugin> _thisPluginType = (Class extends Plugin>)
- getClass().asSubclass(Plugin.class);
-
- private Task _generateSourcesJarTask;
- private Javadoc _generateJavadocTask;
- private Task _generateJavadocJarTask;
- private boolean _configureIvyPublications = true;
-
- public void setPluginType(Class extends Plugin> pluginType)
- {
- _thisPluginType = pluginType;
- }
-
- public void setSourcesJarTask(Task sourcesJarTask)
- {
- _generateSourcesJarTask = sourcesJarTask;
- }
-
- public void setJavadocJarTask(Task javadocJarTask)
- {
- _generateJavadocJarTask = javadocJarTask;
- }
-
- public void setConfigureIvyPublications(boolean configureIvyPublications) {
- _configureIvyPublications = configureIvyPublications;
- }
-
- @Override
- public void apply(Project project)
- {
- checkGradleVersion(project);
-
- project.getPlugins().apply(JavaPlugin.class);
-
- // this HashMap will have a PegasusOptions per sourceSet
- project.getExtensions().getExtraProperties().set("pegasus", new HashMap<>());
- // this map will extract PegasusOptions.GenerationMode to project property
- project.getExtensions().getExtraProperties().set("PegasusGenerationMode",
- Arrays.stream(PegasusOptions.GenerationMode.values())
- .collect(Collectors.toMap(PegasusOptions.GenerationMode::name, Function.identity())));
-
- synchronized (STATIC_PROJECT_EVALUATED_LOCK)
- {
- // Check if this is the first time the block will run. Pegasus plugin can run multiple times in a build if
- // multiple sub-projects applied the plugin.
- if (!project.getRootProject().hasProperty(RUN_ONCE)
- || !Boolean.parseBoolean(String.valueOf(project.getRootProject().property(RUN_ONCE))))
- {
- project.getGradle().projectsEvaluated(gradle ->
- gradle.getRootProject().subprojects(subproject ->
- UNUSED_CONFIGURATIONS.forEach(configurationName -> {
- Configuration conf = subproject.getConfigurations().findByName(configurationName);
- if (conf != null && !conf.getDependencies().isEmpty()) {
- subproject.getLogger().warn("*** Project {} declares dependency to unused configuration \"{}\". "
- + "This configuration is deprecated and you can safely remove the dependency. ***",
- subproject.getPath(), configurationName);
- }
- })
- )
- );
-
- // Re-initialize the static variables as they might have stale values from previous run. With Gradle 3.0 and
- // gradle daemon enabled, the plugin class might not be loaded for every run.
- DATA_TEMPLATE_FILE_SUFFIXES.clear();
- DATA_TEMPLATE_FILE_SUFFIXES.add(DATA_TEMPLATE_FILE_SUFFIX);
- DATA_TEMPLATE_FILE_SUFFIXES.add(PDL_FILE_SUFFIX);
-
- _restModelCompatMessage = new StringBuffer();
- _needCheckinFiles.clear();
- _needBuildFolders.clear();
- _possibleMissingFilesInEarlierCommit.clear();
-
- project.getGradle().buildFinished(result ->
- {
- StringBuilder endOfBuildMessage = new StringBuilder();
- if (_restModelCompatMessage.length() > 0)
- {
- endOfBuildMessage.append(_restModelCompatMessage);
- }
-
- if (!_needCheckinFiles.isEmpty())
- {
- endOfBuildMessage.append(createModifiedFilesMessage(_needCheckinFiles, _needBuildFolders));
- }
-
- if (!_possibleMissingFilesInEarlierCommit.isEmpty())
- {
- endOfBuildMessage.append(createPossibleMissingFilesMessage(_possibleMissingFilesInEarlierCommit));
- }
-
- if (endOfBuildMessage.length() > 0)
- {
- result.getGradle().getRootProject().getLogger().quiet(endOfBuildMessage.toString());
- }
- });
-
- // Set an extra property on the root project to indicate the initialization is complete for the current build.
- project.getRootProject().getExtensions().getExtraProperties().set(RUN_ONCE, true);
- }
- }
-
- ConfigurationContainer configurations = project.getConfigurations();
-
- // configuration for getting the required classes to make pegasus call main methods
- configurations.maybeCreate(PEGASUS_PLUGIN_CONFIGURATION);
-
- // configuration for compiling generated data templates
- Configuration dataTemplateCompile = configurations.maybeCreate("dataTemplateCompile");
- dataTemplateCompile.setVisible(false);
-
- // configuration for running rest client generator
- Configuration restClientCompile = configurations.maybeCreate("restClientCompile");
- restClientCompile.setVisible(false);
-
- // configuration for running data template generator
- // DEPRECATED! This configuration is no longer used. Please stop using it.
- Configuration dataTemplateGenerator = configurations.maybeCreate("dataTemplateGenerator");
- dataTemplateGenerator.setVisible(false);
-
- // configuration for running rest client generator
- // DEPRECATED! This configuration is no longer used. Please stop using it.
- Configuration restTools = configurations.maybeCreate("restTools");
- restTools.setVisible(false);
-
- // configuration for running Avro schema generator
- // DEPRECATED! To skip avro schema generation, use PegasusOptions.generationModes
- Configuration avroSchemaGenerator = configurations.maybeCreate("avroSchemaGenerator");
- avroSchemaGenerator.setVisible(false);
-
- // configuration for depending on data schemas and potentially generated data templates
- // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml
- Configuration dataModel = configurations.maybeCreate("dataModel");
- Configuration testDataModel = configurations.maybeCreate("testDataModel");
- testDataModel.extendsFrom(dataModel);
-
- // configuration for depending on data schemas and potentially generated data templates
- // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml
- Configuration avroSchema = configurations.maybeCreate("avroSchema");
- Configuration testAvroSchema = configurations.maybeCreate("testAvroSchema");
- testAvroSchema.extendsFrom(avroSchema);
-
- // configuration for depending on rest idl and potentially generated client builders
- // and for publishing jars containing rest idl to the project artifacts for including in the ivy.xml
- Configuration restModel = configurations.maybeCreate("restModel");
- Configuration testRestModel = configurations.maybeCreate("testRestModel");
- testRestModel.extendsFrom(restModel);
-
- // configuration for publishing jars containing data schemas and generated data templates
- // to the project artifacts for including in the ivy.xml
- //
- // published data template jars depends on the configurations used to compile the classes
- // in the jar, this includes the data models/templates used by the data template generator
- // and the classes used to compile the generated classes.
- Configuration dataTemplate = configurations.maybeCreate("dataTemplate");
- dataTemplate.extendsFrom(dataTemplateCompile, dataModel);
- Configuration testDataTemplate = configurations.maybeCreate("testDataTemplate");
- testDataTemplate.extendsFrom(dataTemplate, testDataModel);
-
- // configuration for processing and validating schema annotation during build time.
- //
- // The configuration contains dependencies to schema annotation handlers which would process schema annotations
- // and validate.
- Configuration schemaAnnotationHandler = configurations.maybeCreate(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION);
-
- // configuration for publishing jars containing rest idl and generated client builders
- // to the project artifacts for including in the ivy.xml
- //
- // published client builder jars depends on the configurations used to compile the classes
- // in the jar, this includes the data models/templates (potentially generated by this
- // project and) used by the data template generator and the classes used to compile
- // the generated classes.
- Configuration restClient = configurations.maybeCreate("restClient");
- restClient.extendsFrom(restClientCompile, dataTemplate);
- Configuration testRestClient = configurations.maybeCreate("testRestClient");
- testRestClient.extendsFrom(restClient, testDataTemplate);
-
- Properties properties = new Properties();
- InputStream inputStream = getClass().getResourceAsStream("/pegasus-version.properties");
- if (inputStream != null)
- {
- try
- {
- properties.load(inputStream);
- }
- catch (IOException e)
- {
- throw new GradleException("Unable to read pegasus-version.properties file.", e);
- }
-
- String version = properties.getProperty("pegasus.version");
-
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data:" + version);
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data-avro-generator:" + version);
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:generator:" + version);
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:restli-tools:" + version);
- }
- else
- {
- project.getLogger().lifecycle("Unable to add pegasus dependencies to {}. Please be sure that "
- + "'com.linkedin.pegasus:data', 'com.linkedin.pegasus:data-avro-generator', 'com.linkedin.pegasus:generator', 'com.linkedin.pegasus:restli-tools'"
- + " are available on the configuration pegasusPlugin",
- project.getPath());
- }
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "org.slf4j:slf4j-simple:1.7.2");
- project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, project.files(System.getProperty("java.home") + "/../lib/tools.jar"));
-
- // this call has to be here because:
- // 1) artifact cannot be published once projects has been evaluated, so we need to first
- // create the tasks and artifact handler, then progressively append sources
- // 2) in order to append sources progressively, the source and documentation tasks and artifacts must be
- // configured/created before configuring and creating the code generation tasks.
-
- configureGeneratedSourcesAndJavadoc(project);
-
- ChangedFileReportTask changedFileReportTask = project.getTasks()
- .create("changedFilesReport", ChangedFileReportTask.class);
-
- project.getTasks().getByName("check").dependsOn(changedFileReportTask);
-
- SourceSetContainer sourceSets = project.getConvention()
- .getPlugin(JavaPluginConvention.class).getSourceSets();
-
- sourceSets.all(sourceSet ->
- {
- if (sourceSet.getName().toLowerCase(Locale.US).contains("generated"))
- {
- return;
- }
-
- checkAvroSchemaExist(project, sourceSet);
-
- // the idl Generator input options will be inside the PegasusOptions class. Users of the
- // plugin can set the inputOptions in their build.gradle
- @SuppressWarnings("unchecked")
- Map pegasusOptions = (Map) project
- .getExtensions().getExtraProperties().get("pegasus");
-
- pegasusOptions.put(sourceSet.getName(), new PegasusOptions());
-
- // rest model generation could fail on incompatibility
- // if it can fail, fail it early
- configureRestModelGeneration(project, sourceSet);
-
- // Do compatibility check for schemas under "pegasus" directory if the configuration property is provided.
- if (isPropertyTrue(project, ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK))
- {
- configurePegasusSchemaSnapshotGeneration(project, sourceSet, false);
- }
-
- configurePegasusSchemaSnapshotGeneration(project, sourceSet, true);
-
- configureConversionUtilities(project, sourceSet);
-
- GenerateDataTemplateTask generateDataTemplateTask = configureDataTemplateGeneration(project, sourceSet);
-
- configureAvroSchemaGeneration(project, sourceSet);
-
- configureRestClientGeneration(project, sourceSet);
-
- if (!isPropertyTrue(project, DISABLE_SCHEMA_ANNOTATION_VALIDATION))
- {
- configureSchemaAnnotationValidation(project, sourceSet, generateDataTemplateTask);
- }
-
- Task cleanGeneratedDirTask = project.task(sourceSet.getTaskName("clean", "GeneratedDir"));
- cleanGeneratedDirTask.doLast(new CacheableAction<>(task ->
- {
- deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE);
- deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE);
- deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE);
- }));
-
- // make clean depends on deleting the generated directories
- project.getTasks().getByName("clean").dependsOn(cleanGeneratedDirTask);
-
- // Set data schema directories as resource roots
- configureDataSchemaResourcesRoot(project, sourceSet);
- });
-
- project.getExtensions().getExtraProperties().set(GENERATOR_CLASSLOADER_NAME, getClass().getClassLoader());
- }
-
- protected void configureSchemaAnnotationValidation(Project project,
- SourceSet sourceSet,
- GenerateDataTemplateTask generateDataTemplatesTask)
- {
- // Task would execute based on the following order.
- // generateDataTemplatesTask -> validateSchemaAnnotationTask
-
- // Create ValidateSchemaAnnotation task
- ValidateSchemaAnnotationTask validateSchemaAnnotationTask = project.getTasks()
- .create(sourceSet.getTaskName("validate", "schemaAnnotation"), ValidateSchemaAnnotationTask.class, task ->
- {
- task.setInputDir(generateDataTemplatesTask.getInputDir());
- task.setResolverPath(getDataModelConfig(project, sourceSet)); // same resolver path as generateDataTemplatesTask
- task.setClassPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)
- .plus(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION))
- .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME)));
- task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- }
- );
-
- // validateSchemaAnnotationTask depend on generateDataTemplatesTask
- validateSchemaAnnotationTask.dependsOn(generateDataTemplatesTask);
-
- // Check depends on validateSchemaAnnotationTask.
- project.getTasks().getByName("check").dependsOn(validateSchemaAnnotationTask);
- }
-
-
-
- @SuppressWarnings("deprecation")
- protected void configureGeneratedSourcesAndJavadoc(Project project)
- {
- _generateJavadocTask = project.getTasks().create("generateJavadoc", Javadoc.class);
-
- if (_generateSourcesJarTask == null)
- {
- //
- // configuration for publishing jars containing sources for generated classes
- // to the project artifacts for including in the ivy.xml
- //
- ConfigurationContainer configurations = project.getConfigurations();
- Configuration generatedSources = configurations.maybeCreate("generatedSources");
- Configuration testGeneratedSources = configurations.maybeCreate("testGeneratedSources");
- testGeneratedSources.extendsFrom(generatedSources);
-
- _generateSourcesJarTask = project.getTasks().create("generateSourcesJar", Jar.class, jarTask -> {
- jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP);
- jarTask.setDescription("Generates a jar file containing the sources for the generated Java classes.");
- // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- jarTask.getArchiveClassifier().set("sources");
- });
-
- project.getArtifacts().add("generatedSources", _generateSourcesJarTask);
- }
-
- if (_generateJavadocJarTask == null)
- {
- //
- // configuration for publishing jars containing Javadoc for generated classes
- // to the project artifacts for including in the ivy.xml
- //
- ConfigurationContainer configurations = project.getConfigurations();
- Configuration generatedJavadoc = configurations.maybeCreate("generatedJavadoc");
- Configuration testGeneratedJavadoc = configurations.maybeCreate("testGeneratedJavadoc");
- testGeneratedJavadoc.extendsFrom(generatedJavadoc);
-
- _generateJavadocJarTask = project.getTasks().create("generateJavadocJar", Jar.class, jarTask -> {
- jarTask.dependsOn(_generateJavadocTask);
- jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP);
- jarTask.setDescription("Generates a jar file containing the Javadoc for the generated Java classes.");
- // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- jarTask.getArchiveClassifier().set("javadoc");
- jarTask.from(_generateJavadocTask.getDestinationDir());
- });
-
- project.getArtifacts().add("generatedJavadoc", _generateJavadocJarTask);
- }
- else
- {
- // TODO: Tighten the types so that _generateJavadocJarTask must be of type Jar.
- ((Jar) _generateJavadocJarTask).from(_generateJavadocTask.getDestinationDir());
- _generateJavadocJarTask.dependsOn(_generateJavadocTask);
- }
- }
-
- private static void deleteGeneratedDir(Project project, SourceSet sourceSet, String dirType)
- {
- String generatedDirPath = getGeneratedDirPath(project, sourceSet, dirType);
- project.getLogger().info("Delete generated directory {}", generatedDirPath);
- project.delete(generatedDirPath);
- }
-
- private static > Class getCompatibilityLevelClass(Project project)
- {
- ClassLoader generatorClassLoader = (ClassLoader) project.property(GENERATOR_CLASSLOADER_NAME);
-
- String className = "com.linkedin.restli.tools.idlcheck.CompatibilityLevel";
- try
- {
- @SuppressWarnings("unchecked")
- Class enumClass = (Class) generatorClassLoader.loadClass(className).asSubclass(Enum.class);
- return enumClass;
- }
- catch (ClassNotFoundException e)
- {
- throw new RuntimeException("Could not load class " + className);
- }
- }
-
- private static void addGeneratedDir(Project project, SourceSet sourceSet, Collection configurations)
- {
- project.getPlugins().withType(IdeaPlugin.class, ideaPlugin -> {
- IdeaModule ideaModule = ideaPlugin.getModel().getModule();
- // stupid if block needed because of stupid assignment required to update source dirs
- if (isTestSourceSet(sourceSet))
- {
- Set sourceDirs = ideaModule.getTestSourceDirs();
- sourceDirs.addAll(sourceSet.getJava().getSrcDirs());
- // this is stupid but assignment is required
- ideaModule.setTestSourceDirs(sourceDirs);
- if (debug)
- {
- System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule testSourceDirs "
- + ideaModule.getTestSourceDirs());
- }
- }
- else
- {
- Set sourceDirs = ideaModule.getSourceDirs();
- sourceDirs.addAll(sourceSet.getJava().getSrcDirs());
- // this is stupid but assignment is required
- ideaModule.setSourceDirs(sourceDirs);
- if (debug)
- {
- System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule sourceDirs "
- + ideaModule.getSourceDirs());
- }
- }
- Collection compilePlus = ideaModule.getScopes().get("COMPILE").get("plus");
- compilePlus.addAll(configurations);
- ideaModule.getScopes().get("COMPILE").put("plus", compilePlus);
- });
- }
-
- private static void checkAvroSchemaExist(Project project, SourceSet sourceSet)
- {
- String sourceDir = "src" + File.separatorChar + sourceSet.getName();
- File avroSourceDir = project.file(sourceDir + File.separatorChar + "avro");
- if (avroSourceDir.exists())
- {
- project.getLogger().lifecycle("{}'s {} has non-empty avro directory. pegasus plugin does not process avro directory",
- project.getName(), sourceDir);
- }
- }
-
- // Compute the name of the source set that will contain a type of an input generated code.
- // e.g. genType may be 'DataTemplate' or 'Rest'
- private static String getGeneratedSourceSetName(SourceSet sourceSet, String genType)
- {
- return sourceSet.getName() + "Generated" + genType;
- }
-
- // Compute the directory name that will contain a type generated code of an input source set.
- // e.g. genType may be 'DataTemplate' or 'Rest'
- public static String getGeneratedDirPath(Project project, SourceSet sourceSet, String genType)
- {
- String override = getOverridePath(project, sourceSet, "overrideGeneratedDir");
- String sourceSetName = getGeneratedSourceSetName(sourceSet, genType);
- String base = override == null ? "src" : override;
-
- return base + File.separatorChar + sourceSetName;
- }
-
- public static String getDataSchemaPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, "overridePegasusDir");
- if (override == null)
- {
- return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "pegasus";
- }
- else
- {
- return override;
- }
- }
-
- private static String getExtensionSchemaPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, "overrideExtensionSchemaDir");
- if(override == null)
- {
- return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "extensions";
- }
- else
- {
- return override;
- }
- }
-
- private static String getSnapshotPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, "overrideSnapshotDir");
- if (override == null)
- {
- return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "snapshot";
- }
- else
- {
- return override;
- }
- }
-
- private static String getIdlPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, "overrideIdlDir");
- if (override == null)
- {
- return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "idl";
- }
- else
- {
- return override;
- }
- }
-
- private static String getPegasusSchemaSnapshotPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE);
- if (override == null)
- {
- return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_SCHEMA_SNAPSHOT_DIR;
- }
- else
- {
- return override;
- }
- }
-
- private static String getPegasusExtensionSchemaSnapshotPath(Project project, SourceSet sourceSet)
- {
- String override = getOverridePath(project, sourceSet, PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE);
- if (override == null)
- {
- return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR;
- }
- else
- {
- return override;
- }
- }
-
- private static String getOverridePath(Project project, SourceSet sourceSet, String overridePropertyName)
- {
- String sourceSetPropertyName = sourceSet.getName() + '.' + overridePropertyName;
- String override = getNonEmptyProperty(project, sourceSetPropertyName);
-
- if (override == null && sourceSet.getName().equals("main"))
- {
- override = getNonEmptyProperty(project, overridePropertyName);
- }
-
- return override;
- }
-
- private static boolean isTestSourceSet(SourceSet sourceSet)
- {
- return TEST_DIR_REGEX.matcher(sourceSet.getName()).find();
- }
-
- private static Configuration getDataModelConfig(Project project, SourceSet sourceSet)
- {
- return isTestSourceSet(sourceSet)
- ? project.getConfigurations().getByName("testDataModel")
- : project.getConfigurations().getByName("dataModel");
- }
-
- private static boolean isTaskSuccessful(Task task)
- {
- return task.getState().getExecuted()
- // Task is not successful if it is not upto date and is skipped.
- && !(task.getState().getSkipped() && !task.getState().getUpToDate())
- && task.getState().getFailure() == null;
- }
-
- private static boolean isResultEquivalent(File compatibilityLogFile)
- {
- return isResultEquivalent(compatibilityLogFile, false);
- }
-
- private static boolean isResultEquivalent(File compatibilityLogFile, boolean restSpecOnly)
- {
- CompatibilityLogChecker logChecker = new CompatibilityLogChecker();
- try
- {
- logChecker.write(Files.readAllBytes(compatibilityLogFile.toPath()));
- }
- catch (IOException e)
- {
- throw new GradleException("Error while processing compatibility report: " + e.getMessage());
- }
- return logChecker.getRestSpecCompatibility().isEmpty() &&
- (restSpecOnly || logChecker.getModelCompatibility().isEmpty());
- }
-
- protected void configureRestModelGeneration(Project project, SourceSet sourceSet)
- {
- if (sourceSet.getAllSource().isEmpty())
- {
- project.getLogger().info("No source files found for sourceSet {}. Skipping idl generation.", sourceSet.getName());
- return;
- }
-
- // afterEvaluate needed so that api project can be overridden via ext.apiProject
- project.afterEvaluate(p ->
- {
- // find api project here instead of in each project's plugin configuration
- // this allows api project relation options (ext.api*) to be specified anywhere in the build.gradle file
- // alternatively, pass closures to task configuration, and evaluate the closures when task is executed
- Project apiProject = getCheckedApiProject(project);
-
- // make sure the api project is evaluated. Important for configure-on-demand mode.
- if (apiProject != null)
- {
- project.evaluationDependsOn(apiProject.getPath());
-
- if (!apiProject.getPlugins().hasPlugin(_thisPluginType))
- {
- apiProject = null;
- }
- }
-
- if (apiProject == null)
- {
- return;
- }
-
- Task untypedJarTask = project.getTasks().findByName(sourceSet.getJarTaskName());
- if (!(untypedJarTask instanceof Jar))
- {
- return;
- }
- Jar jarTask = (Jar) untypedJarTask;
-
- String snapshotCompatPropertyName = findProperty(FileCompatibilityType.SNAPSHOT);
- if (project.hasProperty(snapshotCompatPropertyName) && "off".equalsIgnoreCase((String) project.property(snapshotCompatPropertyName)))
- {
- project.getLogger().lifecycle("Project {} snapshot compatibility level \"OFF\" is deprecated. Default to \"IGNORE\".",
- project.getPath());
- }
-
- // generate the rest model
- FileCollection restModelCodegenClasspath = project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)
- .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME))
- .plus(sourceSet.getRuntimeClasspath());
- String destinationDirPrefix = getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE) + File.separatorChar;
- FileCollection restModelResolverPath = apiProject.files(getDataSchemaPath(project, sourceSet))
- .plus(getDataModelConfig(apiProject, sourceSet));
- Set watchedRestModelInputDirs = buildWatchedRestModelInputDirs(project, sourceSet);
- Set restModelInputDirs = difference(sourceSet.getAllSource().getSrcDirs(),
- sourceSet.getResources().getSrcDirs());
-
- Task generateRestModelTask = project.getTasks()
- .create(sourceSet.getTaskName("generate", "restModel"), GenerateRestModelTask.class, task ->
- {
- task.dependsOn(project.getTasks().getByName(sourceSet.getClassesTaskName()));
- task.setCodegenClasspath(restModelCodegenClasspath);
- task.setWatchedCodegenClasspath(restModelCodegenClasspath
- .filter(file -> !"main".equals(file.getName()) && !"classes".equals(file.getName())));
- task.setInputDirs(restModelInputDirs);
- task.setWatchedInputDirs(watchedRestModelInputDirs.isEmpty()
- ? restModelInputDirs : watchedRestModelInputDirs);
- // we need all the artifacts from runtime for any private implementation classes the server code might need.
- task.setSnapshotDestinationDir(project.file(destinationDirPrefix + "snapshot"));
- task.setIdlDestinationDir(project.file(destinationDirPrefix + "idl"));
-
- @SuppressWarnings("unchecked")
- Map pegasusOptions = (Map) project
- .getExtensions().getExtraProperties().get("pegasus");
- task.setIdlOptions(pegasusOptions.get(sourceSet.getName()).idlOptions);
-
- task.setResolverPath(restModelResolverPath);
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
-
- task.onlyIf(t -> !isPropertyTrue(project, SKIP_GENERATE_REST_MODEL));
-
- task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE)));
- });
-
- File apiSnapshotDir = apiProject.file(getSnapshotPath(apiProject, sourceSet));
- File apiIdlDir = apiProject.file(getIdlPath(apiProject, sourceSet));
- apiSnapshotDir.mkdirs();
-
- if (!isPropertyTrue(project, SKIP_IDL_CHECK))
- {
- apiIdlDir.mkdirs();
- }
-
- CheckRestModelTask checkRestModelTask = project.getTasks()
- .create(sourceSet.getTaskName("check", "RestModel"), CheckRestModelTask.class, task ->
- {
- task.dependsOn(generateRestModelTask);
- task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix));
- task.setPreviousSnapshotDirectory(apiSnapshotDir);
- task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix));
- task.setPreviousIdlDirectory(apiIdlDir);
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setModelCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT));
- task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK));
-
- task.doLast(new CacheableAction<>(t ->
- {
- if (!task.isEquivalent())
- {
- _restModelCompatMessage.append(task.getWholeMessage());
- }
- }));
- });
-
- CheckSnapshotTask checkSnapshotTask = project.getTasks()
- .create(sourceSet.getTaskName("check", "Snapshot"), CheckSnapshotTask.class, task -> {
- task.dependsOn(generateRestModelTask);
- task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix));
- task.setPreviousSnapshotDirectory(apiSnapshotDir);
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setSnapshotCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT));
-
- task.onlyIf(t -> isPropertyTrue(project, SKIP_IDL_CHECK));
- });
-
- CheckIdlTask checkIdlTask = project.getTasks()
- .create(sourceSet.getTaskName("check", "Idl"), CheckIdlTask.class, task ->
- {
- task.dependsOn(generateRestModelTask);
- task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix));
- task.setPreviousIdlDirectory(apiIdlDir);
- task.setResolverPath(restModelResolverPath);
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setIdlCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
-
-
- task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK)
- && !"OFF".equals(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL)));
- });
-
- // rest model publishing involves cross-project reference
- // configure after all projects have been evaluated
- // the file copy can be turned off by "rest.model.noPublish" flag
- Task publishRestliSnapshotTask = project.getTasks()
- .create(sourceSet.getTaskName("publish", "RestliSnapshot"), PublishRestModelTask.class, task ->
- {
- task.dependsOn(checkRestModelTask, checkSnapshotTask, checkIdlTask);
- task.from(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix));
- task.into(apiSnapshotDir);
- task.setSuffix(SNAPSHOT_FILE_SUFFIX);
-
- task.onlyIf(t ->
- isPropertyTrue(project, SNAPSHOT_FORCE_PUBLISH) ||
- (
- !isPropertyTrue(project, SNAPSHOT_NO_PUBLISH) &&
- (
- (
- isPropertyTrue(project, SKIP_IDL_CHECK) &&
- isTaskSuccessful(checkSnapshotTask) &&
- checkSnapshotTask.getSummaryTarget().exists() &&
- !isResultEquivalent(checkSnapshotTask.getSummaryTarget())
- ) ||
- (
- !isPropertyTrue(project, SKIP_IDL_CHECK) &&
- isTaskSuccessful(checkRestModelTask) &&
- checkRestModelTask.getSummaryTarget().exists() &&
- !isResultEquivalent(checkRestModelTask.getSummaryTarget())
- )
- ))
- );
- });
-
- Task publishRestliIdlTask = project.getTasks()
- .create(sourceSet.getTaskName("publish", "RestliIdl"), PublishRestModelTask.class, task -> {
- task.dependsOn(checkRestModelTask, checkIdlTask, checkSnapshotTask);
- task.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix));
- task.into(apiIdlDir);
- task.setSuffix(IDL_FILE_SUFFIX);
-
- task.onlyIf(t ->
- isPropertyTrue(project, IDL_FORCE_PUBLISH) ||
- (
- !isPropertyTrue(project, IDL_NO_PUBLISH) &&
- (
- (
- isPropertyTrue(project, SKIP_IDL_CHECK) &&
- isTaskSuccessful(checkSnapshotTask) &&
- checkSnapshotTask.getSummaryTarget().exists() &&
- !isResultEquivalent(checkSnapshotTask.getSummaryTarget(), true)
- ) ||
- (
- !isPropertyTrue(project, SKIP_IDL_CHECK) &&
- (
- (isTaskSuccessful(checkRestModelTask) &&
- checkRestModelTask.getSummaryTarget().exists() &&
- !isResultEquivalent(checkRestModelTask.getSummaryTarget(), true)) ||
- (isTaskSuccessful(checkIdlTask) &&
- checkIdlTask.getSummaryTarget().exists() &&
- !isResultEquivalent(checkIdlTask.getSummaryTarget()))
- )
- )
- ))
- );
- });
-
- project.getLogger().info("API project selected for {} is {}",
- publishRestliIdlTask.getPath(), apiProject.getPath());
-
- jarTask.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix));
- // add generated .restspec.json files as resources to the jar
- jarTask.dependsOn(publishRestliSnapshotTask, publishRestliIdlTask);
-
- ChangedFileReportTask changedFileReportTask = (ChangedFileReportTask) project.getTasks()
- .getByName("changedFilesReport");
-
- // Use the files from apiDir for generating the changed files report as we need to notify user only when
- // source system files are modified.
- changedFileReportTask.setIdlFiles(SharedFileUtils.getSuffixedFiles(project, apiIdlDir, IDL_FILE_SUFFIX));
- changedFileReportTask.setSnapshotFiles(SharedFileUtils.getSuffixedFiles(project, apiSnapshotDir,
- SNAPSHOT_FILE_SUFFIX));
- changedFileReportTask.mustRunAfter(publishRestliSnapshotTask, publishRestliIdlTask);
- changedFileReportTask.doLast(new CacheableAction<>(t ->
- {
- if (!changedFileReportTask.getNeedCheckinFiles().isEmpty())
- {
- project.getLogger().info("Adding modified files to need checkin list...");
- _needCheckinFiles.addAll(changedFileReportTask.getNeedCheckinFiles());
- _needBuildFolders.add(getCheckedApiProject(project).getPath());
- }
- }));
- });
- }
-
- protected void configurePegasusSchemaSnapshotGeneration(Project project, SourceSet sourceSet, boolean isExtensionSchema)
- {
- File schemaDir = isExtensionSchema? project.file(getExtensionSchemaPath(project, sourceSet))
- : project.file(getDataSchemaPath(project, sourceSet));
-
- if ((isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, PDL_FILE_SUFFIX).isEmpty()) ||
- (!isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, DATA_TEMPLATE_FILE_SUFFIXES).isEmpty()))
- {
- return;
- }
-
- Path publishablePegasusSchemaSnapshotDir = project.getBuildDir().toPath().resolve(sourceSet.getName() +
- (isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT));
-
- Task generatePegasusSchemaSnapshot = generatePegasusSchemaSnapshot(project, sourceSet,
- isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, schemaDir,
- publishablePegasusSchemaSnapshotDir.toFile(), isExtensionSchema);
-
- File pegasusSchemaSnapshotDir = project.file(isExtensionSchema ? getPegasusExtensionSchemaSnapshotPath(project, sourceSet)
- : getPegasusSchemaSnapshotPath(project, sourceSet));
- pegasusSchemaSnapshotDir.mkdirs();
-
- Task checkSchemaSnapshot = project.getTasks().create(sourceSet.getTaskName("check",
- isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT),
- CheckPegasusSnapshotTask.class, task ->
- {
- task.dependsOn(generatePegasusSchemaSnapshot);
- task.setCurrentSnapshotDirectory(publishablePegasusSchemaSnapshotDir.toFile());
- task.setPreviousSnapshotDirectory(pegasusSchemaSnapshotDir);
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)
- .plus(project.getConfigurations().getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION))
- .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME)));
- task.setCompatibilityLevel(isExtensionSchema ?
- PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT)
- :PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT));
- task.setCompatibilityMode(isExtensionSchema ? COMPATIBILITY_OPTIONS_MODE_EXTENSION :
- PropertyUtil.findCompatMode(project, PEGASUS_COMPATIBILITY_MODE));
- task.setExtensionSchema(isExtensionSchema);
- task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION));
-
- task.onlyIf(t ->
- {
- String pegasusSnapshotCompatPropertyName = isExtensionSchema ?
- findProperty(FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT)
- : findProperty(FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT);
- return !project.hasProperty(pegasusSnapshotCompatPropertyName) ||
- !"off".equalsIgnoreCase((String) project.property(pegasusSnapshotCompatPropertyName));
- });
- });
-
- Task publishPegasusSchemaSnapshot = publishPegasusSchemaSnapshot(project, sourceSet,
- isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, checkSchemaSnapshot,
- publishablePegasusSchemaSnapshotDir.toFile(), pegasusSchemaSnapshotDir);
-
- project.getTasks().getByName(LifecycleBasePlugin.ASSEMBLE_TASK_NAME).dependsOn(publishPegasusSchemaSnapshot);
- }
-
- @SuppressWarnings("deprecation")
- protected void configureAvroSchemaGeneration(Project project, SourceSet sourceSet)
- {
- File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet));
- File avroDir = project.file(getGeneratedDirPath(project, sourceSet, AVRO_SCHEMA_GEN_TYPE)
- + File.separatorChar + "avro");
-
- // generate avro schema files from data schema
- Task generateAvroSchemaTask = project.getTasks()
- .create(sourceSet.getTaskName("generate", "avroSchema"), GenerateAvroSchemaTask.class, task -> {
- task.setInputDir(dataSchemaDir);
- task.setDestinationDir(avroDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
-
- task.onlyIf(t ->
- {
- if (task.getInputDir().exists())
- {
- @SuppressWarnings("unchecked")
- Map pegasusOptions = (Map) project
- .getExtensions().getExtraProperties().get("pegasus");
-
- if (pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.AVRO))
- {
- return true;
- }
- }
-
- return !project.getConfigurations().getByName("avroSchemaGenerator").isEmpty();
- });
-
- task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE)));
- });
-
- project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(generateAvroSchemaTask);
-
- // create avro schema jar file
-
- Task avroSchemaJarTask = project.getTasks().create(sourceSet.getName() + "AvroSchemaJar", Jar.class, task ->
- {
- // add path prefix to each file in the data schema directory
- task.from(avroDir, copySpec ->
- copySpec.eachFile(fileCopyDetails ->
- fileCopyDetails.setPath("avro" + File.separatorChar + fileCopyDetails.getPath())));
-
- // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- task.getArchiveAppendix().set(getAppendix(sourceSet, "avro-schema"));
- task.setDescription("Generate an avro schema jar");
- });
-
- if (!isTestSourceSet(sourceSet))
- {
- project.getArtifacts().add("avroSchema", avroSchemaJarTask);
- }
- else
- {
- project.getArtifacts().add("testAvroSchema", avroSchemaJarTask);
- }
- }
-
- protected void configureConversionUtilities(Project project, SourceSet sourceSet)
- {
- File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet));
- boolean reverse = isPropertyTrue(project, CONVERT_TO_PDL_REVERSE);
- boolean keepOriginal = isPropertyTrue(project, CONVERT_TO_PDL_KEEP_ORIGINAL);
- boolean skipVerification = isPropertyTrue(project, CONVERT_TO_PDL_SKIP_VERIFICATION);
- String preserveSourceCmd = getNonEmptyProperty(project, CONVERT_TO_PDL_PRESERVE_SOURCE_CMD);
-
- // Utility task for migrating between PDSC and PDL.
- project.getTasks().create(sourceSet.getTaskName("convert", "ToPdl"), TranslateSchemasTask.class, task ->
- {
- task.setInputDir(dataSchemaDir);
- task.setDestinationDir(dataSchemaDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setPreserveSourceCmd(preserveSourceCmd);
- if (reverse)
- {
- task.setSourceFormat(SchemaFileType.PDL);
- task.setDestinationFormat(SchemaFileType.PDSC);
- }
- else
- {
- task.setSourceFormat(SchemaFileType.PDSC);
- task.setDestinationFormat(SchemaFileType.PDL);
- }
- task.setKeepOriginal(keepOriginal);
- task.setSkipVerification(skipVerification);
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
-
- task.onlyIf(t -> task.getInputDir().exists());
- task.doLast(new CacheableAction<>(t ->
- {
- project.getLogger().lifecycle("Pegasus schema conversion complete.");
- project.getLogger().lifecycle("All pegasus schema files in " + dataSchemaDir + " have been converted");
- project.getLogger().lifecycle("You can use '-PconvertToPdl.reverse=true|false' to change the direction of conversion.");
- }));
- });
-
- // Helper task for reformatting existing PDL schemas by generating them again.
- project.getTasks().create(sourceSet.getTaskName("reformat", "Pdl"), TranslateSchemasTask.class, task ->
- {
- task.setInputDir(dataSchemaDir);
- task.setDestinationDir(dataSchemaDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setSourceFormat(SchemaFileType.PDL);
- task.setDestinationFormat(SchemaFileType.PDL);
- task.setKeepOriginal(true);
- task.setSkipVerification(true);
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
-
- task.onlyIf(t -> task.getInputDir().exists());
- task.doLast(new CacheableAction<>(t -> project.getLogger().lifecycle("PDL reformat complete.")));
- });
- }
-
- @SuppressWarnings("deprecation")
- protected GenerateDataTemplateTask configureDataTemplateGeneration(Project project, SourceSet sourceSet)
- {
- File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet));
- File generatedDataTemplateDir = project.file(getGeneratedDirPath(project, sourceSet, DATA_TEMPLATE_GEN_TYPE)
- + File.separatorChar + "java");
- File publishableSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath()
- + File.separatorChar + sourceSet.getName() + "Schemas");
- File publishableLegacySchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath()
- + File.separatorChar + sourceSet.getName() + "LegacySchemas");
- File publishableExtensionSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath()
- + File.separatorChar + sourceSet.getName() + "ExtensionSchemas");
-
- // generate data template source files from data schema
- GenerateDataTemplateTask generateDataTemplatesTask = project.getTasks()
- .create(sourceSet.getTaskName("generate", "dataTemplate"), GenerateDataTemplateTask.class, task ->
- {
- task.setInputDir(dataSchemaDir);
- task.setDestinationDir(generatedDataTemplateDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE))
- {
- task.setGenerateLowercasePath(false);
- }
-
- task.onlyIf(t ->
- {
- if (task.getInputDir().exists())
- {
- @SuppressWarnings("unchecked")
- Map pegasusOptions = (Map) project
- .getExtensions().getExtraProperties().get("pegasus");
-
- return pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.PEGASUS);
- }
-
- return false;
- });
-
- task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE)));
- });
-
- // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar.
- ((Jar) _generateSourcesJarTask).from(generateDataTemplatesTask.getDestinationDir());
- _generateSourcesJarTask.dependsOn(generateDataTemplatesTask);
-
- _generateJavadocTask.source(generateDataTemplatesTask.getDestinationDir());
- _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath()
- .plus(project.getConfigurations().getByName("dataTemplateCompile"))
- .plus(generateDataTemplatesTask.getResolverPath()));
- _generateJavadocTask.dependsOn(generateDataTemplatesTask);
-
- // Add extra dependencies for data model compilation
- project.getDependencies().add("dataTemplateCompile", "com.google.code.findbugs:jsr305:3.0.2");
-
- // create new source set for generated java source and class files
- String targetSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE);
-
- SourceSetContainer sourceSets = project.getConvention()
- .getPlugin(JavaPluginConvention.class).getSourceSets();
-
- SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss ->
- {
- ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedDataTemplateDir));
- ss.setCompileClasspath(getDataModelConfig(project, sourceSet)
- .plus(project.getConfigurations().getByName("dataTemplateCompile")));
- });
-
- // idea plugin needs to know about new generated java source directory and its dependencies
- addGeneratedDir(project, targetSourceSet, Arrays.asList(
- getDataModelConfig(project, sourceSet),
- project.getConfigurations().getByName("dataTemplateCompile")));
-
- // Set source compatibility to 1.8 as the data-templates now generate code with Java 8 features.
- JavaCompile compileTask = project.getTasks()
- .withType(JavaCompile.class).getByName(targetSourceSet.getCompileJavaTaskName());
- compileTask.doFirst(new CacheableAction<>(task -> {
- ((JavaCompile) task).setSourceCompatibility("1.8");
- ((JavaCompile) task).setTargetCompatibility("1.8");
- }));
- // make sure that java source files have been generated before compiling them
- compileTask.dependsOn(generateDataTemplatesTask);
-
- // Dummy task to maintain backward compatibility
- // TODO: Delete this task once use cases have had time to reference the new task
- Task destroyStaleFiles = project.getTasks().create(sourceSet.getName() + "DestroyStaleFiles", Delete.class);
- destroyStaleFiles.onlyIf(task -> {
- project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath());
- return false;
- });
-
- // Dummy task to maintain backward compatibility, as this task was replaced by CopySchemas
- // TODO: Delete this task once use cases have had time to reference the new task
- Task copyPdscSchemasTask = project.getTasks().create(sourceSet.getName() + "CopyPdscSchemas", Copy.class);
- copyPdscSchemasTask.dependsOn(destroyStaleFiles);
- copyPdscSchemasTask.onlyIf(task -> {
- project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath());
- return false;
- });
-
- // Prepare schema files for publication by syncing schema folders.
- Task prepareSchemasForPublishTask = project.getTasks()
- .create(sourceSet.getName() + "CopySchemas", Sync.class, task ->
- {
- task.from(dataSchemaDir, syncSpec -> DATA_TEMPLATE_FILE_SUFFIXES.forEach(suffix -> syncSpec.include("**/*" + suffix)));
- task.into(publishableSchemasBuildDir);
- });
- prepareSchemasForPublishTask.dependsOn(copyPdscSchemasTask);
-
- Collection dataTemplateJarDepends = new ArrayList<>();
- dataTemplateJarDepends.add(compileTask);
- dataTemplateJarDepends.add(prepareSchemasForPublishTask);
-
- // Convert all PDL files back to PDSC for publication
- // TODO: Remove this conversion permanently once translated PDSCs are no longer needed.
- Task prepareLegacySchemasForPublishTask = project.getTasks()
- .create(sourceSet.getName() + "TranslateSchemas", TranslateSchemasTask.class, task ->
- {
- task.setInputDir(dataSchemaDir);
- task.setDestinationDir(publishableLegacySchemasBuildDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setSourceFormat(SchemaFileType.PDL);
- task.setDestinationFormat(SchemaFileType.PDSC);
- task.setKeepOriginal(true);
- task.setSkipVerification(true);
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- });
-
- prepareLegacySchemasForPublishTask.dependsOn(destroyStaleFiles);
- dataTemplateJarDepends.add(prepareLegacySchemasForPublishTask);
-
- // extension schema directory
- File extensionSchemaDir = project.file(getExtensionSchemaPath(project, sourceSet));
-
- if (!SharedFileUtils.getSuffixedFiles(project, extensionSchemaDir, PDL_FILE_SUFFIX).isEmpty())
- {
- // Validate extension schemas if extension schemas are provided.
- ValidateExtensionSchemaTask validateExtensionSchemaTask = project.getTasks()
- .create(sourceSet.getTaskName("validate", "ExtensionSchemas"), ValidateExtensionSchemaTask.class, task ->
- {
- task.setInputDir(extensionSchemaDir);
- task.setResolverPath(
- getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet))));
- task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- });
-
- Task prepareExtensionSchemasForPublishTask = project.getTasks()
- .create(sourceSet.getName() + "CopyExtensionSchemas", Sync.class, task ->
- {
- task.from(extensionSchemaDir, syncSpec -> syncSpec.include("**/*" + PDL_FILE_SUFFIX));
- task.into(publishableExtensionSchemasBuildDir);
- });
-
- prepareExtensionSchemasForPublishTask.dependsOn(validateExtensionSchemaTask);
- prepareExtensionSchemasForPublishTask.dependsOn(copyPdscSchemasTask);
- dataTemplateJarDepends.add(prepareExtensionSchemasForPublishTask);
- }
-
- // include pegasus files in the output of this SourceSet
- project.getTasks().withType(ProcessResources.class).getByName(targetSourceSet.getProcessResourcesTaskName(), it ->
- {
- it.from(prepareSchemasForPublishTask, copy -> copy.into("pegasus"));
- // TODO: Remove this permanently once translated PDSCs are no longer needed.
- it.from(prepareLegacySchemasForPublishTask, copy -> copy.into(TRANSLATED_SCHEMAS_DIR));
- Sync copyExtensionSchemasTask = project.getTasks().withType(Sync.class).findByName(sourceSet.getName() + "CopyExtensionSchemas");
- if (copyExtensionSchemasTask != null)
- {
- it.from(copyExtensionSchemasTask, copy -> copy.into("extensions"));
- }
- });
-
- // create data template jar file
- Jar dataTemplateJarTask = project.getTasks()
- .create(sourceSet.getName() + "DataTemplateJar", Jar.class, task ->
- {
- task.dependsOn(dataTemplateJarDepends);
- task.from(targetSourceSet.getOutput());
-
- // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- task.getArchiveAppendix().set(getAppendix(sourceSet, "data-template"));
- task.setDescription("Generate a data template jar");
- });
-
- // add the data model and date template jars to the list of project artifacts.
- if (!isTestSourceSet(sourceSet))
- {
- project.getArtifacts().add("dataTemplate", dataTemplateJarTask);
- }
- else
- {
- project.getArtifacts().add("testDataTemplate", dataTemplateJarTask);
- }
-
- // include additional dependencies into the appropriate configuration used to compile the input source set
- // must include the generated data template classes and their dependencies the configuration.
- // "compile" and "testCompile" configurations have been removed in Gradle 7,
- // but to keep the maximum backward compatibility, here we handle Gradle 7 and earlier version differently
- // Once MIN_REQUIRED_VERSION reaches 7.0, we can remove the check of isAtLeastGradle7()
- String compileConfigName;
- if (isAtLeastGradle7()) {
- compileConfigName = isTestSourceSet(sourceSet) ? "testImplementation" : project.getConfigurations().findByName("api") != null ? "api" : "implementation";
- }
- else
- {
- compileConfigName = isTestSourceSet(sourceSet) ? "testCompile" : "compile";
- }
-
- Configuration compileConfig = project.getConfigurations().maybeCreate(compileConfigName);
- compileConfig.extendsFrom(
- getDataModelConfig(project, sourceSet),
- project.getConfigurations().getByName("dataTemplateCompile"));
-
- // The getArchivePath() API doesn’t carry any task dependency and has been deprecated.
- // Replace it with getArchiveFile() on Gradle 7,
- // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1
- // DataHub Note - applied FIXME
- project.getDependencies().add(compileConfigName, project.files(
- isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath()));
-
- if (_configureIvyPublications) {
- // The below Action is only applied when the 'ivy-publish' is applied by the consumer.
- // If the consumer does not use ivy-publish, this is a noop.
- // this Action prepares the project applying the pegasus plugin to publish artifacts using these steps:
- // 1. Registers "feature variants" for pegasus-specific artifacts;
- // see https://docs.gradle.org/6.1/userguide/feature_variants.html
- // 2. Wires legacy configurations like `dataTemplateCompile` to auto-generated feature variant *Api and
- // *Implementation configurations for backwards compatibility.
- // 3. Configures the Ivy Publication to include auto-generated feature variant *Api and *Implementation
- // configurations and their dependencies.
- project.getPlugins().withType(IvyPublishPlugin.class, ivyPublish -> {
- if (!isAtLeastGradle61())
- {
- throw new GradleException("Using the ivy-publish plugin with the pegasus plugin requires Gradle 6.1 or higher " +
- "at build time. Please upgrade.");
- }
-
- JavaPluginExtension java = project.getExtensions().getByType(JavaPluginExtension.class);
- // create new capabilities per source set; automatically creates api and implementation configurations
- String featureName = mapSourceSetToFeatureName(targetSourceSet);
- try
- {
- /*
- reflection is required to preserve compatibility with Gradle 5.2.1 and below
- TODO once Gradle 5.3+ is required, remove reflection and replace with:
- java.registerFeature(featureName, featureSpec -> {
- featureSpec.usingSourceSet(targetSourceSet);
- });
- */
- Method registerFeature = JavaPluginExtension.class.getDeclaredMethod("registerFeature", String.class, Action.class);
- Action>/**/ featureSpecAction = createFeatureVariantFromSourceSet(targetSourceSet);
- registerFeature.invoke(java, featureName, featureSpecAction);
- }
- catch (ReflectiveOperationException e)
- {
- throw new GradleException("Unable to register new feature variant", e);
- }
-
- // expose transitive dependencies to consumers via variant configurations
- Configuration featureConfiguration = project.getConfigurations().getByName(featureName);
- Configuration mainGeneratedDataTemplateApi = project.getConfigurations().getByName(targetSourceSet.getApiConfigurationName());
- featureConfiguration.extendsFrom(mainGeneratedDataTemplateApi);
- mainGeneratedDataTemplateApi.extendsFrom(
- getDataModelConfig(project, targetSourceSet),
- project.getConfigurations().getByName("dataTemplateCompile"));
-
- // Configure the existing IvyPublication
- // For backwards-compatibility, make the legacy dataTemplate/testDataTemplate configurations extend
- // their replacements, auto-created when we registered the new feature variant
- project.afterEvaluate(p -> {
- PublishingExtension publishing = p.getExtensions().getByType(PublishingExtension.class);
- // When configuring a Gradle Publication, use this value to find the name of the publication to configure. Defaults to "ivy".
- String publicationName = p.getExtensions().getExtraProperties().getProperties().getOrDefault("PegasusPublicationName", "ivy").toString();
- IvyPublication ivyPublication = publishing.getPublications().withType(IvyPublication.class).getByName(publicationName);
- ivyPublication.configurations(configurations -> configurations.create(featureName, legacyConfiguration -> {
- legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getApiElementsConfigurationName()).getName());
- legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getRuntimeElementsConfigurationName()).getName());
- }));
- });
- });
- }
-
- if (debug)
- {
- System.out.println("configureDataTemplateGeneration sourceSet " + sourceSet.getName());
- System.out.println(compileConfigName + ".allDependencies : "
- + project.getConfigurations().getByName(compileConfigName).getAllDependencies());
- System.out.println(compileConfigName + ".extendsFrom: "
- + project.getConfigurations().getByName(compileConfigName).getExtendsFrom());
- System.out.println(compileConfigName + ".transitive: "
- + project.getConfigurations().getByName(compileConfigName).isTransitive());
- }
-
- project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(dataTemplateJarTask);
- return generateDataTemplatesTask;
- }
-
- private String mapSourceSetToFeatureName(SourceSet sourceSet) {
- String featureName = "";
- switch (sourceSet.getName()) {
- case "mainGeneratedDataTemplate":
- featureName = "dataTemplate";
- break;
- case "testGeneratedDataTemplate":
- featureName = "testDataTemplate";
- break;
- case "mainGeneratedRest":
- featureName = "restClient";
- break;
- case "testGeneratedRest":
- featureName = "testRestClient";
- break;
- case "mainGeneratedAvroSchema":
- featureName = "avroSchema";
- break;
- case "testGeneratedAvroSchema":
- featureName = "testAvroSchema";
- break;
- default:
- String msg = String.format("Unable to map %s to an appropriate feature name", sourceSet);
- throw new GradleException(msg);
- }
- return featureName;
- }
-
- // Generate rest client from idl files generated from java source files in the specified source set.
- //
- // This generates rest client source files from idl file generated from java source files
- // in the source set. The generated rest client source files will be in a new source set.
- // It also compiles the rest client source files into classes, and creates both the
- // rest model and rest client jar files.
- //
- @SuppressWarnings("deprecation")
- protected void configureRestClientGeneration(Project project, SourceSet sourceSet)
- {
- // idl directory for api project
- File idlDir = project.file(getIdlPath(project, sourceSet));
- if (SharedFileUtils.getSuffixedFiles(project, idlDir, IDL_FILE_SUFFIX).isEmpty() && !isPropertyTrue(project,
- PROCESS_EMPTY_IDL_DIR))
- {
- return;
- }
- File generatedRestClientDir = project.file(getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE)
- + File.separatorChar + "java");
-
- // always include imported data template jars in compileClasspath of rest client
- FileCollection dataModelConfig = getDataModelConfig(project, sourceSet);
-
- // if data templates generated from this source set, add the generated data template jar to compileClasspath
- // of rest client.
- String dataTemplateSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE);
-
- Jar dataTemplateJarTask = null;
-
- SourceSetContainer sourceSets = project.getConvention()
- .getPlugin(JavaPluginConvention.class).getSourceSets();
-
- FileCollection dataModels;
- if (sourceSets.findByName(dataTemplateSourceSetName) != null)
- {
- if (debug)
- {
- System.out.println("sourceSet " + sourceSet.getName() + " has generated sourceSet " + dataTemplateSourceSetName);
- }
- dataTemplateJarTask = (Jar) project.getTasks().getByName(sourceSet.getName() + "DataTemplateJar");
- // The getArchivePath() API doesn’t carry any task dependency and has been deprecated.
- // Replace it with getArchiveFile() on Gradle 7,
- // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1
- // DataHub Note - applied FIXME
- dataModels = dataModelConfig.plus(project.files(
- isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath()));
- }
- else
- {
- dataModels = dataModelConfig;
- }
-
- // create source set for generated rest model, rest client source and class files.
- String targetSourceSetName = getGeneratedSourceSetName(sourceSet, REST_GEN_TYPE);
- SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss ->
- {
- ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedRestClientDir));
- ss.setCompileClasspath(dataModels.plus(project.getConfigurations().getByName("restClientCompile")));
- });
-
- project.getPlugins().withType(EclipsePlugin.class, eclipsePlugin -> {
- EclipseModel eclipseModel = (EclipseModel) project.getExtensions().findByName("eclipse");
- eclipseModel.getClasspath().getPlusConfigurations()
- .add(project.getConfigurations().getByName("restClientCompile"));
- });
-
- // idea plugin needs to know about new rest client source directory and its dependencies
- addGeneratedDir(project, targetSourceSet, Arrays.asList(
- getDataModelConfig(project, sourceSet),
- project.getConfigurations().getByName("restClientCompile")));
-
- // generate the rest client source files
- GenerateRestClientTask generateRestClientTask = project.getTasks()
- .create(targetSourceSet.getTaskName("generate", "restClient"), GenerateRestClientTask.class, task ->
- {
- task.dependsOn(project.getConfigurations().getByName("dataTemplate"));
- task.setInputDir(idlDir);
- task.setResolverPath(dataModels.plus(project.getConfigurations().getByName("restClientCompile")));
- task.setRuntimeClasspath(project.getConfigurations().getByName("dataModel")
- .plus(project.getConfigurations().getByName("dataTemplate").getArtifacts().getFiles()));
- task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setDestinationDir(generatedRestClientDir);
- task.setRestli2FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_2));
- task.setRestli1FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_1));
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE))
- {
- task.setGenerateLowercasePath(false);
- }
- if (isPropertyTrue(project, ENABLE_FLUENT_API))
- {
- task.setGenerateFluentApi(true);
- }
- task.doFirst(new CacheableAction<>(t -> project.delete(generatedRestClientDir)));
- });
-
- if (dataTemplateJarTask != null)
- {
- generateRestClientTask.dependsOn(dataTemplateJarTask);
- }
-
- // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar.
- ((Jar) _generateSourcesJarTask).from(generateRestClientTask.getDestinationDir());
- _generateSourcesJarTask.dependsOn(generateRestClientTask);
-
- _generateJavadocTask.source(generateRestClientTask.getDestinationDir());
- _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath()
- .plus(project.getConfigurations().getByName("restClientCompile"))
- .plus(generateRestClientTask.getResolverPath()));
- _generateJavadocTask.dependsOn(generateRestClientTask);
-
- // make sure rest client source files have been generated before compiling them
- JavaCompile compileGeneratedRestClientTask = (JavaCompile) project.getTasks()
- .getByName(targetSourceSet.getCompileJavaTaskName());
- compileGeneratedRestClientTask.dependsOn(generateRestClientTask);
- compileGeneratedRestClientTask.getOptions().getCompilerArgs().add("-Xlint:-deprecation");
-
- // create the rest model jar file
- Task restModelJarTask = project.getTasks().create(sourceSet.getName() + "RestModelJar", Jar.class, task ->
- {
- task.from(idlDir, copySpec ->
- {
- copySpec.eachFile(fileCopyDetails -> project.getLogger()
- .info("Add idl file: {}", fileCopyDetails));
- copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX));
- });
- // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-model"));
- task.setDescription("Generate rest model jar");
- });
-
- // create the rest client jar file
- Task restClientJarTask = project.getTasks()
- .create(sourceSet.getName() + "RestClientJar", Jar.class, task ->
- {
- task.dependsOn(compileGeneratedRestClientTask);
- task.from(idlDir, copySpec -> {
- copySpec.eachFile(fileCopyDetails -> {
- project.getLogger().info("Add interface file: {}", fileCopyDetails);
- fileCopyDetails.setPath("idl" + File.separatorChar + fileCopyDetails.getPath());
- });
- copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX));
- });
- task.from(targetSourceSet.getOutput());
- // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1
- // DataHub Note - applied FIXME
- task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-client"));
- task.setDescription("Generate rest client jar");
- });
-
- // add the rest model jar and the rest client jar to the list of project artifacts.
- if (!isTestSourceSet(sourceSet))
- {
- project.getArtifacts().add("restModel", restModelJarTask);
- project.getArtifacts().add("restClient", restClientJarTask);
- }
- else
- {
- project.getArtifacts().add("testRestModel", restModelJarTask);
- project.getArtifacts().add("testRestClient", restClientJarTask);
- }
- }
-
- // Return the appendix for generated jar files.
- // The source set name is not included for the main source set.
- private static String getAppendix(SourceSet sourceSet, String suffix)
- {
- return sourceSet.getName().equals("main") ? suffix : sourceSet.getName() + '-' + suffix;
- }
-
- private static Project getApiProject(Project project)
- {
- if (project.getExtensions().getExtraProperties().has("apiProject"))
- {
- return (Project) project.getExtensions().getExtraProperties().get("apiProject");
- }
-
- List subsSuffixes;
- if (project.getExtensions().getExtraProperties().has("apiProjectSubstitutionSuffixes"))
- {
- @SuppressWarnings("unchecked")
- List suffixValue = (List) project.getExtensions()
- .getExtraProperties().get("apiProjectSubstitutionSuffixes");
-
- subsSuffixes = suffixValue;
- }
- else
- {
- subsSuffixes = Arrays.asList("-impl", "-service", "-server", "-server-impl");
- }
-
- for (String suffix : subsSuffixes)
- {
- if (project.getPath().endsWith(suffix))
- {
- String searchPath = project.getPath().substring(0, project.getPath().length() - suffix.length()) + "-api";
- Project apiProject = project.findProject(searchPath);
- if (apiProject != null)
- {
- return apiProject;
- }
- }
- }
-
- return project.findProject(project.getPath() + "-api");
- }
-
- private static Project getCheckedApiProject(Project project)
- {
- Project apiProject = getApiProject(project);
-
- if (apiProject == project)
- {
- throw new GradleException("The API project of ${project.path} must not be itself.");
- }
-
- return apiProject;
- }
-
- /**
- * return the property value if the property exists and is not empty (-Pname=value)
- * return null if property does not exist or the property is empty (-Pname)
- *
- * @param project the project where to look for the property
- * @param propertyName the name of the property
- */
- public static String getNonEmptyProperty(Project project, String propertyName)
- {
- if (!project.hasProperty(propertyName))
- {
- return null;
- }
-
- String propertyValue = project.property(propertyName).toString();
- if (propertyValue.isEmpty())
- {
- return null;
- }
-
- return propertyValue;
- }
-
- /**
- * Return true if the given property exists and its value is true
- *
- * @param project the project where to look for the property
- * @param propertyName the name of the property
- */
- public static boolean isPropertyTrue(Project project, String propertyName)
- {
- return project.hasProperty(propertyName) && Boolean.valueOf(project.property(propertyName).toString());
- }
-
- private static String createModifiedFilesMessage(Collection nonEquivExpectedFiles,
- Collection foldersToBeBuilt)
- {
- StringBuilder builder = new StringBuilder();
- builder.append("\nRemember to checkin the changes to the following new or modified files:\n");
- for (String file : nonEquivExpectedFiles)
- {
- builder.append(" ");
- builder.append(file);
- builder.append("\n");
- }
-
- if (!foldersToBeBuilt.isEmpty())
- {
- builder.append("\nThe file modifications include service interface changes, you can build the the following projects "
- + "to re-generate the client APIs accordingly:\n");
- for (String folder : foldersToBeBuilt)
- {
- builder.append(" ");
- builder.append(folder);
- builder.append("\n");
- }
- }
-
- return builder.toString();
- }
-
- private static String createPossibleMissingFilesMessage(Collection missingFiles)
- {
- StringBuilder builder = new StringBuilder();
- builder.append("If this is the result of an automated build, then you may have forgotten to check in some snapshot or idl files:\n");
- for (String file : missingFiles)
- {
- builder.append(" ");
- builder.append(file);
- builder.append("\n");
- }
-
- return builder.toString();
- }
-
- private static String findProperty(FileCompatibilityType type)
- {
- String property;
- switch (type)
- {
- case SNAPSHOT:
- property = SNAPSHOT_COMPAT_REQUIREMENT;
- break;
- case IDL:
- property = IDL_COMPAT_REQUIREMENT;
- break;
- case PEGASUS_SCHEMA_SNAPSHOT:
- property = PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT;
- break;
- case PEGASUS_EXTENSION_SCHEMA_SNAPSHOT:
- property = PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT;
- break;
- default:
- throw new GradleException("No property defined for compatibility type " + type);
- }
- return property;
- }
-
- private static Set buildWatchedRestModelInputDirs(Project project, SourceSet sourceSet) {
- @SuppressWarnings("unchecked")
- Map pegasusOptions = (Map) project
- .getExtensions().getExtraProperties().get("pegasus");
-
- File rootPath = new File(project.getProjectDir(),
- pegasusOptions.get(sourceSet.getName()).restModelOptions.getRestResourcesRootPath());
-
- IdlOptions idlOptions = pegasusOptions.get(sourceSet.getName()).idlOptions;
-
- // if idlItems exist, only watch the smaller subset
- return idlOptions.getIdlItems().stream()
- .flatMap(idlItem -> Arrays.stream(idlItem.packageNames))
- .map(packageName -> new File(rootPath, packageName.replace('.', '/')))
- .collect(Collectors.toCollection(TreeSet::new));
- }
-
- private static Set difference(Set left, Set right)
- {
- Set result = new HashSet<>(left);
- result.removeAll(right);
- return result;
- }
-
- /**
- * Configures the given source set so that its data schema directory (usually 'pegasus') is marked as a resource root.
- * The purpose of this is to improve the IDE experience. Makes sure to exclude this directory from being packaged in
- * with the default Jar task.
- */
- private static void configureDataSchemaResourcesRoot(Project project, SourceSet sourceSet)
- {
- sourceSet.resources(sourceDirectorySet -> {
- final String dataSchemaPath = getDataSchemaPath(project, sourceSet);
- final File dataSchemaRoot = project.file(dataSchemaPath);
- sourceDirectorySet.srcDir(dataSchemaPath);
- project.getLogger().info("Adding resource root '{}'", dataSchemaPath);
-
- final String extensionsSchemaPath = getExtensionSchemaPath(project, sourceSet);
- final File extensionsSchemaRoot = project.file(extensionsSchemaPath);
- sourceDirectorySet.srcDir(extensionsSchemaPath);
- project.getLogger().info("Adding resource root '{}'", extensionsSchemaPath);
-
- // Exclude the data schema and extensions schema directory from being copied into the default Jar task
- sourceDirectorySet.getFilter().exclude(fileTreeElement -> {
- final File file = fileTreeElement.getFile();
- // Traversal starts with the children of a resource root, so checking the direct parent is sufficient
- final boolean underDataSchemaRoot = dataSchemaRoot.equals(file.getParentFile());
- final boolean underExtensionsSchemaRoot = extensionsSchemaRoot.equals(file.getParentFile());
- final boolean exclude = (underDataSchemaRoot || underExtensionsSchemaRoot);
- if (exclude)
- {
- project.getLogger().info("Excluding resource directory '{}'", file);
- }
- return exclude;
- });
- });
- }
-
- private Task generatePegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, File inputDir, File outputDir,
- boolean isExtensionSchema)
- {
- return project.getTasks().create(sourceSet.getTaskName("generate", taskName),
- GeneratePegasusSnapshotTask.class, task ->
- {
- task.setInputDir(inputDir);
- task.setResolverPath(getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet))));
- task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION));
- task.setPegasusSchemaSnapshotDestinationDir(outputDir);
- task.setExtensionSchema(isExtensionSchema);
- if (isPropertyTrue(project, ENABLE_ARG_FILE))
- {
- task.setEnableArgFile(true);
- }
- });
- }
-
- private Task publishPegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, Task checkPegasusSnapshotTask,
- File inputDir, File outputDir)
- {
- return project.getTasks().create(sourceSet.getTaskName("publish", taskName),
- Sync.class, task ->
- {
- task.dependsOn(checkPegasusSnapshotTask);
- task.from(inputDir);
- task.into(outputDir);
- task.onlyIf(t -> !SharedFileUtils.getSuffixedFiles(project, inputDir, PDL_FILE_SUFFIX).isEmpty());
- });
- }
-
- private void checkGradleVersion(Project project)
- {
- if (MIN_REQUIRED_VERSION.compareTo(GradleVersion.current()) > 0)
- {
- throw new GradleException(String.format("This plugin does not support %s. Please use %s or later.",
- GradleVersion.current(),
- MIN_REQUIRED_VERSION));
- }
- if (MIN_SUGGESTED_VERSION.compareTo(GradleVersion.current()) > 0)
- {
- project.getLogger().warn(String.format("Pegasus supports %s, but it may not be supported in the next major release. Please use %s or later.",
- GradleVersion.current(),
- MIN_SUGGESTED_VERSION));
- }
- }
-
- /**
- * Reflection is necessary to obscure types introduced in Gradle 5.3
- *
- * @param sourceSet the target sourceset upon which to create a new feature variant
- * @return an Action which modifies a org.gradle.api.plugins.FeatureSpec instance
- */
- private Action>/**/ createFeatureVariantFromSourceSet(SourceSet sourceSet)
- {
- return featureSpec -> {
- try
- {
- Class> clazz = Class.forName("org.gradle.api.plugins.FeatureSpec");
- Method usingSourceSet = clazz.getDeclaredMethod("usingSourceSet", SourceSet.class);
- usingSourceSet.invoke(featureSpec, sourceSet);
- }
- catch (ReflectiveOperationException e)
- {
- throw new GradleException("Unable to invoke FeatureSpec#usingSourceSet(SourceSet)", e);
- }
- };
- }
-
- protected static boolean isAtLeastGradle61()
- {
- return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("6.1")) >= 0;
- }
-
- public static boolean isAtLeastGradle7() {
- return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("7.0")) >= 0;
- }
-}
\ No newline at end of file
diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java
deleted file mode 100644
index a2aafaf1be017..0000000000000
--- a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package com.linkedin.pegasus.gradle.tasks;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.function.Consumer;
-import java.util.stream.Collectors;
-import org.gradle.api.DefaultTask;
-import org.gradle.api.file.FileCollection;
-import org.gradle.api.specs.Specs;
-import org.gradle.api.tasks.InputFiles;
-import org.gradle.api.tasks.Internal;
-import org.gradle.api.tasks.SkipWhenEmpty;
-import org.gradle.api.tasks.TaskAction;
-import org.gradle.work.FileChange;
-import org.gradle.work.InputChanges;
-
-
-public class ChangedFileReportTask extends DefaultTask
-{
- private final Collection _needCheckinFiles = new ArrayList<>();
-
- private FileCollection _idlFiles = getProject().files();
- private FileCollection _snapshotFiles = getProject().files();
-
- public ChangedFileReportTask()
- {
- //with Gradle 6.0, Declaring an incremental task without outputs is not allowed.
- getOutputs().upToDateWhen(Specs.satisfyNone());
- }
-
- // DataHub Note - updated for InputChanges
- @TaskAction
- public void checkFilesForChanges(InputChanges inputChanges)
- {
- getLogger().lifecycle("Checking idl and snapshot files for changes...");
- getLogger().info("idlFiles: " + _idlFiles.getAsPath());
- getLogger().info("snapshotFiles: " + _snapshotFiles.getAsPath());
-
- Set filesRemoved = new HashSet<>();
- Set filesAdded = new HashSet<>();
- Set filesChanged = new HashSet<>();
-
- if (inputChanges.isIncremental())
- {
- Consumer handleChange = change ->
- {
- switch (change.getChangeType()) {
- case ADDED:
- filesAdded.add(change.getFile().getAbsolutePath());
- break;
- case REMOVED:
- filesRemoved.add(change.getFile().getAbsolutePath());
- break;
- case MODIFIED:
- filesChanged.add(change.getFile().getAbsolutePath());
- break;
- }
- };
-
- inputChanges.getFileChanges(_idlFiles).forEach(handleChange);
- inputChanges.getFileChanges(_snapshotFiles).forEach(handleChange);
-
- if (!filesRemoved.isEmpty())
- {
- String files = joinByComma(filesRemoved);
- _needCheckinFiles.add(files);
- getLogger().lifecycle(
- "The following files have been removed, be sure to remove them from source control: {}", files);
- }
-
- if (!filesAdded.isEmpty())
- {
- String files = joinByComma(filesAdded);
- _needCheckinFiles.add(files);
- getLogger().lifecycle("The following files have been added, be sure to add them to source control: {}", files);
- }
-
- if (!filesChanged.isEmpty())
- {
- String files = joinByComma(filesChanged);
- _needCheckinFiles.add(files);
- getLogger().lifecycle(
- "The following files have been changed, be sure to commit the changes to source control: {}", files);
- }
- }
- }
-
- private String joinByComma(Set files)
- {
- return files.stream().collect(Collectors.joining(", "));
- }
-
- @InputFiles
- @SkipWhenEmpty
- public FileCollection getSnapshotFiles()
- {
- return _snapshotFiles;
- }
-
- public void setSnapshotFiles(FileCollection snapshotFiles)
- {
- _snapshotFiles = snapshotFiles;
- }
-
- @InputFiles
- @SkipWhenEmpty
- public FileCollection getIdlFiles()
- {
- return _idlFiles;
- }
-
- public void setIdlFiles(FileCollection idlFiles)
- {
- _idlFiles = idlFiles;
- }
-
- @Internal
- public Collection getNeedCheckinFiles()
- {
- return _needCheckinFiles;
- }
-}
\ No newline at end of file
From 7ae310d3a761887d0ac7c8a3720be545f661a89f Mon Sep 17 00:00:00 2001
From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com>
Date: Thu, 8 Feb 2024 06:32:23 +0530
Subject: [PATCH 6/7] feat(graphql): get raw aspects for assertions, allow
aspectNames filter (#9792)
---
.../linkedin/datahub/graphql/GmsGraphQLEngine.java | 4 +++-
.../datahub/graphql/WeaklyTypedAspectsResolver.java | 7 ++++++-
.../src/main/resources/entity.graphql | 12 ++++++++++++
3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
index e3eef0688c269..28b3a982c7b28 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java
@@ -2436,7 +2436,9 @@ private void configureAssertionResolvers(final RuntimeWiring.Builder builder) {
? assertion.getDataPlatformInstance().getUrn()
: null;
}))
- .dataFetcher("runEvents", new AssertionRunEventResolver(entityClient)));
+ .dataFetcher("runEvents", new AssertionRunEventResolver(entityClient))
+ .dataFetcher(
+ "aspects", new WeaklyTypedAspectsResolver(entityClient, entityRegistry)));
}
private void configurePolicyResolvers(final RuntimeWiring.Builder builder) {
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java
index d8665ae784bd1..fd23cd5fdda45 100644
--- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java
+++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java
@@ -37,7 +37,12 @@ public class WeaklyTypedAspectsResolver implements DataFetcher
Date: Thu, 8 Feb 2024 14:05:26 +0530
Subject: [PATCH 7/7] feat(ingest/slack): source to get user info from slack
(#9776)
---
metadata-ingestion/setup.py | 8 +
.../ingestion/source/slack/__init__.py | 0
.../datahub/ingestion/source/slack/slack.py | 181 ++++++++++++++++++
3 files changed, 189 insertions(+)
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/slack/__init__.py
create mode 100644 metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py
index bbbab73fd1cf5..74dcde5e066b3 100644
--- a/metadata-ingestion/setup.py
+++ b/metadata-ingestion/setup.py
@@ -245,6 +245,10 @@
powerbi_report_server = {"requests", "requests_ntlm"}
+slack = {
+ "slack-sdk==3.18.1"
+}
+
databricks = {
# 0.1.11 appears to have authentication issues with azure databricks
"databricks-sdk>=0.9.0",
@@ -367,6 +371,7 @@
"snowflake": snowflake_common | usage_common | sqlglot_lib,
"sqlalchemy": sql_common,
"sql-queries": usage_common | sqlglot_lib,
+ "slack": slack,
"superset": {
"requests",
"sqlalchemy",
@@ -503,6 +508,7 @@
"redshift",
"s3",
"snowflake",
+ "slack",
"tableau",
"teradata",
"trino",
@@ -543,6 +549,7 @@
"kafka-connect",
"ldap",
"mongodb",
+ "slack",
"mssql",
"mysql",
"mariadb",
@@ -597,6 +604,7 @@
"postgres = datahub.ingestion.source.sql.postgres:PostgresSource",
"redash = datahub.ingestion.source.redash:RedashSource",
"redshift = datahub.ingestion.source.redshift.redshift:RedshiftSource",
+ "slack = datahub.ingestion.source.slack.slack:SlackSource",
"snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source",
"superset = datahub.ingestion.source.superset:SupersetSource",
"tableau = datahub.ingestion.source.tableau:TableauSource",
diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/slack/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
new file mode 100644
index 0000000000000..ed425cc25d98f
--- /dev/null
+++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py
@@ -0,0 +1,181 @@
+import logging
+import textwrap
+from dataclasses import dataclass
+from typing import Iterable, Optional
+
+from pydantic import Field, SecretStr
+from slack_sdk import WebClient
+
+from datahub.configuration.common import ConfigModel
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.ingestion.api.common import PipelineContext
+from datahub.ingestion.api.decorators import (
+ SupportStatus,
+ config_class,
+ platform_name,
+ support_status,
+)
+from datahub.ingestion.api.source import (
+ SourceReport,
+ TestableSource,
+ TestConnectionReport,
+)
+from datahub.ingestion.api.workunit import MetadataWorkUnit
+from datahub.metadata.schema_classes import CorpUserEditableInfoClass
+from datahub.utilities.urns.urn import Urn
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CorpUser:
+ urn: Optional[str] = None
+ email: Optional[str] = None
+ slack_id: Optional[str] = None
+ title: Optional[str] = None
+ image_url: Optional[str] = None
+ phone: Optional[str] = None
+
+
+class SlackSourceConfig(ConfigModel):
+ bot_token: SecretStr = Field(
+ description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email` and `users.profile:read` scopes.",
+ )
+
+
+@platform_name("Slack")
+@config_class(SlackSourceConfig)
+@support_status(SupportStatus.TESTING)
+class SlackSource(TestableSource):
+ def __init__(self, ctx: PipelineContext, config: SlackSourceConfig):
+ self.ctx = ctx
+ self.config = config
+ self.report = SourceReport()
+
+ @classmethod
+ def create(cls, config_dict, ctx):
+ config = SlackSourceConfig.parse_obj(config_dict)
+ return cls(ctx, config)
+
+ @staticmethod
+ def test_connection(config_dict: dict) -> TestConnectionReport:
+ raise NotImplementedError("This class does not implement this method")
+
+ def get_slack_client(self) -> WebClient:
+ return WebClient(token=self.config.bot_token.get_secret_value())
+
+ def get_workunits_internal(
+ self,
+ ) -> Iterable[MetadataWorkUnit]:
+ assert self.ctx.graph is not None
+ auth_resp = self.get_slack_client().auth_test()
+ logger.info("Successfully connected to Slack")
+ logger.info(auth_resp.data)
+ for user_obj in self.get_user_to_be_updated():
+ self.populate_slack_id_from_email(user_obj)
+ if user_obj.slack_id is None:
+ continue
+ self.populate_user_profile(user_obj)
+ if user_obj.urn is None:
+ continue
+ logger.info(f"User: {user_obj}")
+ corpuser_editable_info = (
+ self.ctx.graph.get_aspect(
+ entity_urn=user_obj.urn, aspect_type=CorpUserEditableInfoClass
+ )
+ or CorpUserEditableInfoClass()
+ )
+ corpuser_editable_info.email = user_obj.email
+ corpuser_editable_info.slack = user_obj.slack_id
+ corpuser_editable_info.title = user_obj.title
+ if user_obj.image_url:
+ corpuser_editable_info.pictureLink = user_obj.image_url
+ if user_obj.phone:
+ corpuser_editable_info.phone = user_obj.phone
+ yield MetadataWorkUnit(
+ id=f"{user_obj.urn}",
+ mcp=MetadataChangeProposalWrapper(
+ entityUrn=user_obj.urn,
+ aspect=corpuser_editable_info,
+ ),
+ )
+
+ def populate_user_profile(self, user_obj: CorpUser) -> None:
+ try:
+ # https://api.slack.com/methods/users.profile.get
+ user_profile_res = self.get_slack_client().users_profile_get(
+ user=user_obj.slack_id
+ )
+ user_profile = user_profile_res.get("profile", {})
+ user_obj.title = user_profile.get("title")
+ user_obj.image_url = user_profile.get("image_192")
+ user_obj.phone = user_profile.get("phone")
+ except Exception as e:
+ if "missing_scope" in str(e):
+ raise e
+ return
+
+ def populate_slack_id_from_email(self, user_obj: CorpUser) -> None:
+ if user_obj.email is None:
+ return
+ try:
+ # https://api.slack.com/methods/users.lookupByEmail
+ user_info_res = self.get_slack_client().users_lookupByEmail(
+ email=user_obj.email
+ )
+ user_info = user_info_res.get("user", {})
+ user_obj.slack_id = user_info.get("id")
+ except Exception as e:
+ if "users_not_found" in str(e):
+ return
+ raise e
+
+ def get_user_to_be_updated(self) -> Iterable[CorpUser]:
+ graphql_query = textwrap.dedent(
+ """
+ query listUsers($input: ListUsersInput!) {
+ listUsers(input: $input) {
+ total
+ users {
+ urn
+ editableProperties {
+ email
+ slack
+ }
+ }
+ }
+ }
+ """
+ )
+ start = 0
+ count = 10
+ total = count
+
+ assert self.ctx.graph is not None
+
+ while start < total:
+ variables = {"input": {"start": start, "count": count}}
+ response = self.ctx.graph.execute_graphql(
+ query=graphql_query, variables=variables
+ )
+ list_users = response.get("listUsers", {})
+ total = list_users.get("total", 0)
+ users = list_users.get("users", [])
+ for user in users:
+ user_obj = CorpUser()
+ editable_properties = user.get("editableProperties", {})
+ user_obj.urn = user.get("urn")
+ if user_obj.urn is None:
+ continue
+ if editable_properties is not None:
+ user_obj.email = editable_properties.get("email")
+ if user_obj.email is None:
+ urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string()
+ if "@" in urn_id:
+ user_obj.email = urn_id
+ if user_obj.email is not None:
+ yield user_obj
+ start += count
+
+ def get_report(self) -> SourceReport:
+ return self.report