diff --git a/build.gradle b/build.gradle index ba61d97f0ed6e..ea81d26355027 100644 --- a/build.gradle +++ b/build.gradle @@ -30,7 +30,7 @@ buildscript { ext.junitJupiterVersion = '5.6.1' // Releases: https://github.com/linkedin/rest.li/blob/master/CHANGELOG.md - ext.pegasusVersion = '29.48.4' + ext.pegasusVersion = '29.51.0' ext.mavenVersion = '3.6.3' ext.springVersion = '6.1.2' ext.springBootVersion = '3.2.1' @@ -269,13 +269,14 @@ allprojects { apply plugin: 'eclipse' // apply plugin: 'org.gradlex.java-ecosystem-capabilities' - tasks.withType(Test).configureEach { - // https://docs.gradle.org/current/userguide/performance.html - maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + tasks.withType(Test).configureEach { task -> if (task.project.name != "metadata-io") { + // https://docs.gradle.org/current/userguide/performance.html + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 - if (project.configurations.getByName("testImplementation").getDependencies() - .any{ it.getName().contains("testng") }) { - useTestNG() + if (project.configurations.getByName("testImplementation").getDependencies() + .any { it.getName().contains("testng") }) { + useTestNG() + } } } diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 0c2d91e1f7ac1..88900e06d4845 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -25,7 +25,4 @@ dependencies { compileOnly 'org.projectlombok:lombok:1.18.30' annotationProcessor 'org.projectlombok:lombok:1.18.30' - - // pegasus dependency, overrides for tasks - implementation 'com.linkedin.pegasus:gradle-plugins:29.48.4' } \ No newline at end of file diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java deleted file mode 100644 index 2460abcad6f9e..0000000000000 --- a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/PegasusPlugin.java +++ /dev/null @@ -1,2444 +0,0 @@ -/* - * Copyright (c) 2019 LinkedIn Corp. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.linkedin.pegasus.gradle; - -import com.linkedin.pegasus.gradle.PegasusOptions.IdlOptions; -import com.linkedin.pegasus.gradle.internal.CompatibilityLogChecker; -import com.linkedin.pegasus.gradle.tasks.ChangedFileReportTask; -import com.linkedin.pegasus.gradle.tasks.CheckIdlTask; -import com.linkedin.pegasus.gradle.tasks.CheckPegasusSnapshotTask; -import com.linkedin.pegasus.gradle.tasks.CheckRestModelTask; -import com.linkedin.pegasus.gradle.tasks.CheckSnapshotTask; -import com.linkedin.pegasus.gradle.tasks.GenerateAvroSchemaTask; -import com.linkedin.pegasus.gradle.tasks.GenerateDataTemplateTask; -import com.linkedin.pegasus.gradle.tasks.GeneratePegasusSnapshotTask; -import com.linkedin.pegasus.gradle.tasks.GenerateRestClientTask; -import com.linkedin.pegasus.gradle.tasks.GenerateRestModelTask; -import com.linkedin.pegasus.gradle.tasks.PublishRestModelTask; -import com.linkedin.pegasus.gradle.tasks.TranslateSchemasTask; -import com.linkedin.pegasus.gradle.tasks.ValidateExtensionSchemaTask; -import com.linkedin.pegasus.gradle.tasks.ValidateSchemaAnnotationTask; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Method; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.TreeSet; -import java.util.function.Function; -import java.util.regex.Pattern; -import java.util.stream.Collectors; - -import org.gradle.api.Action; -import org.gradle.api.GradleException; -import org.gradle.api.Plugin; -import org.gradle.api.Project; -import org.gradle.api.Task; -import org.gradle.api.artifacts.Configuration; -import org.gradle.api.artifacts.ConfigurationContainer; -import org.gradle.api.file.FileCollection; -import org.gradle.api.plugins.JavaBasePlugin; -import org.gradle.api.plugins.JavaPlugin; -import org.gradle.api.plugins.JavaPluginConvention; -import org.gradle.api.plugins.JavaPluginExtension; -import org.gradle.api.publish.PublishingExtension; -import org.gradle.api.publish.ivy.IvyPublication; -import org.gradle.api.publish.ivy.plugins.IvyPublishPlugin; -import org.gradle.api.tasks.Copy; -import org.gradle.api.tasks.Delete; -import org.gradle.api.tasks.SourceSet; -import org.gradle.api.tasks.SourceSetContainer; -import org.gradle.api.tasks.Sync; -import org.gradle.api.tasks.TaskProvider; -import org.gradle.api.tasks.bundling.Jar; -import org.gradle.api.tasks.compile.JavaCompile; -import org.gradle.api.tasks.javadoc.Javadoc; -import org.gradle.language.base.plugins.LifecycleBasePlugin; -import org.gradle.language.jvm.tasks.ProcessResources; -import org.gradle.plugins.ide.eclipse.EclipsePlugin; -import org.gradle.plugins.ide.eclipse.model.EclipseModel; -import org.gradle.plugins.ide.idea.IdeaPlugin; -import org.gradle.plugins.ide.idea.model.IdeaModule; -import org.gradle.util.GradleVersion; - - -/** - * Pegasus code generation plugin. - * The supported project layout for this plugin is as follows: - * - *
- *   --- api/
- *   |   --- build.gradle
- *   |   --- src/
- *   |       --- <sourceSet>/
- *   |       |   --- idl/
- *   |       |   |   --- <published idl (.restspec.json) files>
- *   |       |   --- java/
- *   |       |   |   --- <packageName>/
- *   |       |   |       --- <common java files>
- *   |       |   --- pegasus/
- *   |       |       --- <packageName>/
- *   |       |           --- <data schema (.pdsc) files>
- *   |       --- <sourceSet>GeneratedDataTemplate/
- *   |       |   --- java/
- *   |       |       --- <packageName>/
- *   |       |           --- <data template source files generated from data schema (.pdsc) files>
- *   |       --- <sourceSet>GeneratedAvroSchema/
- *   |       |   --- avro/
- *   |       |       --- <packageName>/
- *   |       |           --- <avsc avro schema files (.avsc) generated from pegasus schema files>
- *   |       --- <sourceSet>GeneratedRest/
- *   |           --- java/
- *   |               --- <packageName>/
- *   |                   --- <rest client source (.java) files generated from published idl>
- *   --- impl/
- *   |   --- build.gradle
- *   |   --- src/
- *   |       --- <sourceSet>/
- *   |       |   --- java/
- *   |       |       --- <packageName>/
- *   |       |           --- <resource class source (.java) files>
- *   |       --- <sourceSet>GeneratedRest/
- *   |           --- idl/
- *   |               --- <generated idl (.restspec.json) files>
- *   --- <other projects>/
- * 
- * - *

Performs the following functions:

- * - *

Generate data model and data template jars for each source set.

- * - *

Overview:

- * - *

- * In the api project, the plugin generates the data template source (.java) files from the - * data schema (.pdsc) files, and furthermore compiles the source files and packages them - * to jar files. Details of jar contents will be explained in following paragraphs. - * In general, data schema files should exist only in api projects. - *

- * - *

- * Configure the server and client implementation projects to depend on the - * api project's dataTemplate configuration to get access to the generated data templates - * from within these projects. This allows api classes to be built first so that implementation - * projects can consume them. We recommend this structure to avoid circular dependencies - * (directly or indirectly) among implementation projects. - *

- * - *

Detail:

- * - *

- * Generates data template source (.java) files from data schema (.pdsc) files, - * compiles the data template source (.java) files into class (.class) files, - * creates a data model jar file and a data template jar file. - * The data model jar file contains the source data schema (.pdsc) files. - * The data template jar file contains both the source data schema (.pdsc) files - * and the generated data template class (.class) files. - *

- * - *

- * In the data template generation phase, the plugin creates a new target source set - * for the generated files. The new target source set's name is the input source set name's - * suffixed with "GeneratedDataTemplate", e.g. "mainGeneratedDataTemplate". - * The plugin invokes PegasusDataTemplateGenerator to generate data template source (.java) files - * for all data schema (.pdsc) files present in the input source set's pegasus - * directory, e.g. "src/main/pegasus". The generated data template source (.java) files - * will be in the new target source set's java source directory, e.g. - * "src/mainGeneratedDataTemplate/java". In addition to - * the data schema (.pdsc) files in the pegasus directory, the dataModel configuration - * specifies resolver path for the PegasusDataTemplateGenerator. The resolver path - * provides the data schemas and previously generated data template classes that - * may be referenced by the input source set's data schemas. In most cases, the dataModel - * configuration should contain data template jars. - *

- * - *

- * The next phase is the data template compilation phase, the plugin compiles the generated - * data template source (.java) files into class files. The dataTemplateCompile configuration - * specifies the pegasus jars needed to compile these classes. The compileClasspath of the - * target source set is a composite of the dataModel configuration which includes the data template - * classes that were previously generated and included in the dependent data template jars, - * and the dataTemplateCompile configuration. - * This configuration should specify a dependency on the Pegasus data jar. - *

- * - *

- * The following phase is creating the the data model jar and the data template jar. - * This plugin creates the data model jar that includes the contents of the - * input source set's pegasus directory, and sets the jar file's classification to - * "data-model". Hence, the resulting jar file's name should end with "-data-model.jar". - * It adds the data model jar as an artifact to the dataModel configuration. - * This jar file should only contain data schema (.pdsc) files. - *

- * - *

- * This plugin also create the data template jar that includes the contents of the input - * source set's pegasus directory and the java class output directory of the - * target source set. It sets the jar file's classification to "data-template". - * Hence, the resulting jar file's name should end with "-data-template.jar". - * It adds the data template jar file as an artifact to the dataTemplate configuration. - * This jar file contains both data schema (.pdsc) files and generated data template - * class (.class) files. - *

- * - *

- * This plugin will ensure that data template source files are generated before - * compiling the input source set and before the idea and eclipse tasks. It - * also adds the generated classes to the compileClasspath of the input source set. - *

- * - *

- * The configurations that apply to generating the data model and data template jars - * are as follow: - *

- *

- * - *

Performs the following functions:

- * - *

Generate avro schema jars for each source set.

- * - *

Overview:

- * - *

- * In the api project, the task 'generateAvroSchema' generates the avro schema (.avsc) - * files from pegasus schema (.pdsc) files. In general, data schema files should exist - * only in api projects. - *

- * - *

- * Configure the server and client implementation projects to depend on the - * api project's avroSchema configuration to get access to the generated avro schemas - * from within these projects. - *

- * - *

- * This plugin also create the avro schema jar that includes the contents of the input - * source set's avro directory and the avsc schema files. - * The resulting jar file's name should end with "-avro-schema.jar". - *

- * - *

Generate rest model and rest client jars for each source set.

- * - *

Overview:

- * - *

- * In the api project, generates rest client source (.java) files from the idl, - * compiles the rest client source (.java) files to rest client class (.class) files - * and puts them in jar files. In general, the api project should be only place that - * contains the publishable idl files. If the published idl changes an existing idl - * in the api project, the plugin will emit message indicating this has occurred and - * suggest that the entire project be rebuilt if it is desirable for clients of the - * idl to pick up the newly published changes. - *

- * - *

- * In the impl project, generates the idl (.restspec.json) files from the input - * source set's resource class files, then compares them against the existing idl - * files in the api project for compatibility checking. If incompatible changes are - * found, the build fails (unless certain flag is specified, see below). If the - * generated idl passes compatibility checks (see compatibility check levels below), - * publishes the generated idl (.restspec.json) to the api project. - *

- * - *

Detail:

- * - *

rest client generation phase: in api project

- * - *

- * In this phase, the rest client source (.java) files are generated from the - * api project idl (.restspec.json) files using RestRequestBuilderGenerator. - * The generated rest client source files will be in the new target source set's - * java source directory, e.g. "src/mainGeneratedRest/java". - *

- * - *

- * RestRequestBuilderGenerator requires access to the data schemas referenced - * by the idl. The dataModel configuration specifies the resolver path needed - * by RestRequestBuilderGenerator to access the data schemas referenced by - * the idl that is not in the source set's pegasus directory. - * This plugin automatically includes the data schema (.pdsc) files in the - * source set's pegasus directory in the resolver path. - * In most cases, the dataModel configuration should contain data template jars. - * The data template jars contains both data schema (.pdsc) files and generated - * data template class (.class) files. By specifying data template jars instead - * of data model jars, redundant generation of data template classes is avoided - * as classes that can be found in the resolver path are not generated. - *

- * - *

rest client compilation phase: in api project

- * - *

- * In this phase, the plugin compiles the generated rest client source (.java) - * files into class files. The restClientCompile configuration specifies the - * pegasus jars needed to compile these classes. The compile classpath is a - * composite of the dataModel configuration which includes the data template - * classes that were previously generated and included in the dependent data template - * jars, and the restClientCompile configuration. - * This configuration should specify a dependency on the Pegasus restli-client jar. - *

- * - *

- * The following stage is creating the the rest model jar and the rest client jar. - * This plugin creates the rest model jar that includes the - * generated idl (.restspec.json) files, and sets the jar file's classification to - * "rest-model". Hence, the resulting jar file's name should end with "-rest-model.jar". - * It adds the rest model jar as an artifact to the restModel configuration. - * This jar file should only contain idl (.restspec.json) files. - *

- * - *

- * This plugin also create the rest client jar that includes the generated - * idl (.restspec.json) files and the java class output directory of the - * target source set. It sets the jar file's classification to "rest-client". - * Hence, the resulting jar file's name should end with "-rest-client.jar". - * It adds the rest client jar file as an artifact to the restClient configuration. - * This jar file contains both idl (.restspec.json) files and generated rest client - * class (.class) files. - *

- * - *

idl generation phase: in server implementation project

- * - *

- * Before entering this phase, the plugin will ensure that generating idl will - * occur after compiling the input source set. It will also ensure that IDEA - * and Eclipse tasks runs after rest client source (.java) files are generated. - *

- * - *

- * In this phase, the plugin creates a new target source set for the generated files. - * The new target source set's name is the input source set name's* suffixed with - * "GeneratedRest", e.g. "mainGeneratedRest". The plugin invokes - * RestLiResourceModelExporter to generate idl (.restspec.json) files for each - * IdlItem in the input source set's pegasus IdlOptions. The generated idl files - * will be in target source set's idl directory, e.g. "src/mainGeneratedRest/idl". - * For example, the following adds an IdlItem to the source set's pegasus IdlOptions. - * This line should appear in the impl project's build.gradle. If no IdlItem is added, - * this source set will be excluded from generating idl and checking idl compatibility, - * even there are existing idl files. - *

- *   pegasus.main.idlOptions.addIdlItem(["com.linkedin.restli.examples.groups.server"])
- * 
- *

- * - *

- * After the idl generation phase, each included idl file is checked for compatibility against - * those in the api project. In case the current interface breaks compatibility, - * by default the build fails and reports all compatibility errors and warnings. Otherwise, - * the build tasks in the api project later will package the resource classes into jar files. - * User can change the compatibility requirement between the current and published idl by - * setting the "rest.model.compatibility" project property, i.e. - * "gradle -Prest.model.compatibility= ..." The following levels are supported: - *

- * The plugin needs to know where the api project is. It searches the api project in the - * following steps. If all searches fail, the build fails. - *
    - *
  1. - * Use the specified project from the impl project build.gradle file. The ext.apiProject - * property explicitly assigns the api project. E.g. - *
    - *       ext.apiProject = project(':groups:groups-server-api')
    - *     
    - * If multiple such statements exist, the last will be used. Wrong project path causes Gradle - * evaluation error. - *
  2. - *
  3. - * If no ext.apiProject property is defined, the plugin will try to guess the - * api project name with the following conventions. The search stops at the first successful match. - *
      - *
    1. - * If the impl project name ends with the following suffixes, substitute the suffix with "-api". - *
        - *
      1. -impl
      2. - *
      3. -service
      4. - *
      5. -server
      6. - *
      7. -server-impl
      8. - *
      - * This list can be overridden by inserting the following line to the project build.gradle: - *
      - *           ext.apiProjectSubstitutionSuffixes = ['-new-suffix-1', '-new-suffix-2']
      - *         
      - * Alternatively, this setting could be applied globally to all projects by putting it in - * the subprojects section of the root build.gradle. - *
    2. - *
    3. - * Append "-api" to the impl project name. - *
    4. - *
    - *
  4. - *
- * The plugin invokes RestLiResourceModelCompatibilityChecker to check compatibility. - *

- * - *

- * The idl files in the api project are not generated by the plugin, but rather - * "published" from the impl project. The publishRestModel task is used to copy the - * idl files to the api project. This task is invoked automatically if the idls are - * verified to be "safe". "Safe" is determined by the "rest.model.compatibility" - * property. Because this task is skipped if the idls are functionally equivalent - * (not necessarily identical, e.g. differ in doc fields), if the default "equivalent" - * compatibility level is used, no file will be copied. If such automatic publishing - * is intended to be skip, set the "rest.model.skipPublish" property to true. - * Note that all the properties are per-project and can be overridden in each project's - * build.gradle file. - *

- * - *

- * Please always keep in mind that if idl publishing is happened, a subsequent whole-project - * rebuild is necessary to pick up the changes. Otherwise, the Hudson job will fail and - * the source code commit will fail. - *

- * - *

- * The configurations that apply to generating the rest model and rest client jars - * are as follow: - *

- *

- * - *

- * This plugin considers test source sets whose names begin with 'test' or 'integTest' to be - * test source sets. - *

- */ -public class PegasusPlugin implements Plugin -{ - public static boolean debug = false; - - private static final GradleVersion MIN_REQUIRED_VERSION = GradleVersion.version("1.0"); // Next: 5.2.1 - private static final GradleVersion MIN_SUGGESTED_VERSION = GradleVersion.version("5.2.1"); // Next: 5.3 - - // - // Constants for generating sourceSet names and corresponding directory names - // for generated code - // - private static final String DATA_TEMPLATE_GEN_TYPE = "DataTemplate"; - private static final String REST_GEN_TYPE = "Rest"; - private static final String AVRO_SCHEMA_GEN_TYPE = "AvroSchema"; - - public static final String DATA_TEMPLATE_FILE_SUFFIX = ".pdsc"; - public static final String PDL_FILE_SUFFIX = ".pdl"; - // gradle property to opt OUT schema annotation validation, by default this feature is enabled. - private static final String DISABLE_SCHEMA_ANNOTATION_VALIDATION = "schema.annotation.validation.disable"; - // gradle property to opt in for destroying stale files from the build directory, - // by default it is disabled, because it triggers hot-reload (even if it results in a no-op) - private static final String DESTROY_STALE_FILES_ENABLE = "enableDestroyStaleFiles"; - public static final Collection DATA_TEMPLATE_FILE_SUFFIXES = new ArrayList<>(); - - public static final String IDL_FILE_SUFFIX = ".restspec.json"; - public static final String SNAPSHOT_FILE_SUFFIX = ".snapshot.json"; - public static final String SNAPSHOT_COMPAT_REQUIREMENT = "rest.model.compatibility"; - public static final String IDL_COMPAT_REQUIREMENT = "rest.idl.compatibility"; - // Pegasus schema compatibility level configuration, which is used to define the {@link CompatibilityLevel}. - public static final String PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.pegasusSchema.compatibility"; - // Pegasus extension schema compatibility level configuration, which is used to define the {@link CompatibilityLevel} - public static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT = "pegasusPlugin.extensionSchema.compatibility"; - // CompatibilityOptions Mode configuration, which is used to define the {@link CompatibilityOptions#Mode} in the compatibility checker. - private static final String PEGASUS_COMPATIBILITY_MODE = "pegasusPlugin.pegasusSchemaCompatibilityCheckMode"; - - private static final Pattern TEST_DIR_REGEX = Pattern.compile("^(integ)?[Tt]est"); - private static final String SNAPSHOT_NO_PUBLISH = "rest.model.noPublish"; - private static final String SNAPSHOT_FORCE_PUBLISH = "rest.model.forcePublish"; - private static final String PROCESS_EMPTY_IDL_DIR = "rest.idl.processEmptyIdlDir"; - private static final String IDL_NO_PUBLISH = "rest.idl.noPublish"; - private static final String IDL_FORCE_PUBLISH = "rest.idl.forcePublish"; - private static final String SKIP_IDL_CHECK = "rest.idl.skipCheck"; - // gradle property to skip running GenerateRestModel task. - // Note it affects GenerateRestModel task only, and does not skip tasks depends on GenerateRestModel. - private static final String SKIP_GENERATE_REST_MODEL= "rest.model.skipGenerateRestModel"; - private static final String SUPPRESS_REST_CLIENT_RESTLI_2 = "rest.client.restli2.suppress"; - private static final String SUPPRESS_REST_CLIENT_RESTLI_1 = "rest.client.restli1.suppress"; - - private static final String GENERATOR_CLASSLOADER_NAME = "pegasusGeneratorClassLoader"; - - private static final String CONVERT_TO_PDL_REVERSE = "convertToPdl.reverse"; - private static final String CONVERT_TO_PDL_KEEP_ORIGINAL = "convertToPdl.keepOriginal"; - private static final String CONVERT_TO_PDL_SKIP_VERIFICATION = "convertToPdl.skipVerification"; - private static final String CONVERT_TO_PDL_PRESERVE_SOURCE_CMD = "convertToPdl.preserveSourceCmd"; - - // Below variables are used to collect data across all pegasus projects (sub-projects) and then print information - // to the user at the end after build is finished. - private static StringBuffer _restModelCompatMessage = new StringBuffer(); - private static final Collection _needCheckinFiles = new ArrayList<>(); - private static final Collection _needBuildFolders = new ArrayList<>(); - private static final Collection _possibleMissingFilesInEarlierCommit = new ArrayList<>(); - - private static final String RUN_ONCE = "runOnce"; - private static final Object STATIC_PROJECT_EVALUATED_LOCK = new Object(); - - private static final List UNUSED_CONFIGURATIONS = Arrays.asList( - "dataTemplateGenerator", "restTools", "avroSchemaGenerator"); - // Directory in the dataTemplate jar that holds schemas translated from PDL to PDSC. - private static final String TRANSLATED_SCHEMAS_DIR = "legacyPegasusSchemas"; - // Enable the use of argFiles for the tasks that support them - private static final String ENABLE_ARG_FILE = "pegasusPlugin.enableArgFile"; - // Enable the generation of fluent APIs - private static final String ENABLE_FLUENT_API = "pegasusPlugin.enableFluentApi"; - - // This config impacts GenerateDataTemplateTask and GenerateRestClientTask; - // If not set, by default all paths generated in these two tasks will be lower-case. - // This default behavior is needed because Linux, MacOS, Windows treat case sensitive paths differently, - // and we want to be consistent, so we choose lower-case as default case for path generated - private static final String CODE_GEN_PATH_CASE_SENSITIVE = "pegasusPlugin.generateCaseSensitivePath"; - - private static final String PEGASUS_PLUGIN_CONFIGURATION = "pegasusPlugin"; - - // Enable the use of generic pegasus schema compatibility checker - private static final String ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK = "pegasusPlugin.enablePegasusSchemaCompatibilityCheck"; - - private static final String PEGASUS_SCHEMA_SNAPSHOT = "PegasusSchemaSnapshot"; - - private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT = "PegasusExtensionSchemaSnapshot"; - - private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR = "pegasusSchemaSnapshot"; - - private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR = "pegasusExtensionSchemaSnapshot"; - - private static final String PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusSchemaSnapshotDir"; - - private static final String PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE = "overridePegasusExtensionSchemaSnapshotDir"; - - private static final String SRC = "src"; - - private static final String SCHEMA_ANNOTATION_HANDLER_CONFIGURATION = "schemaAnnotationHandler"; - - private static final String COMPATIBILITY_OPTIONS_MODE_EXTENSION = "EXTENSION"; - - - @SuppressWarnings("unchecked") - private Class> _thisPluginType = (Class>) - getClass().asSubclass(Plugin.class); - - private Task _generateSourcesJarTask; - private Javadoc _generateJavadocTask; - private Task _generateJavadocJarTask; - private boolean _configureIvyPublications = true; - - public void setPluginType(Class> pluginType) - { - _thisPluginType = pluginType; - } - - public void setSourcesJarTask(Task sourcesJarTask) - { - _generateSourcesJarTask = sourcesJarTask; - } - - public void setJavadocJarTask(Task javadocJarTask) - { - _generateJavadocJarTask = javadocJarTask; - } - - public void setConfigureIvyPublications(boolean configureIvyPublications) { - _configureIvyPublications = configureIvyPublications; - } - - @Override - public void apply(Project project) - { - checkGradleVersion(project); - - project.getPlugins().apply(JavaPlugin.class); - - // this HashMap will have a PegasusOptions per sourceSet - project.getExtensions().getExtraProperties().set("pegasus", new HashMap<>()); - // this map will extract PegasusOptions.GenerationMode to project property - project.getExtensions().getExtraProperties().set("PegasusGenerationMode", - Arrays.stream(PegasusOptions.GenerationMode.values()) - .collect(Collectors.toMap(PegasusOptions.GenerationMode::name, Function.identity()))); - - synchronized (STATIC_PROJECT_EVALUATED_LOCK) - { - // Check if this is the first time the block will run. Pegasus plugin can run multiple times in a build if - // multiple sub-projects applied the plugin. - if (!project.getRootProject().hasProperty(RUN_ONCE) - || !Boolean.parseBoolean(String.valueOf(project.getRootProject().property(RUN_ONCE)))) - { - project.getGradle().projectsEvaluated(gradle -> - gradle.getRootProject().subprojects(subproject -> - UNUSED_CONFIGURATIONS.forEach(configurationName -> { - Configuration conf = subproject.getConfigurations().findByName(configurationName); - if (conf != null && !conf.getDependencies().isEmpty()) { - subproject.getLogger().warn("*** Project {} declares dependency to unused configuration \"{}\". " - + "This configuration is deprecated and you can safely remove the dependency. ***", - subproject.getPath(), configurationName); - } - }) - ) - ); - - // Re-initialize the static variables as they might have stale values from previous run. With Gradle 3.0 and - // gradle daemon enabled, the plugin class might not be loaded for every run. - DATA_TEMPLATE_FILE_SUFFIXES.clear(); - DATA_TEMPLATE_FILE_SUFFIXES.add(DATA_TEMPLATE_FILE_SUFFIX); - DATA_TEMPLATE_FILE_SUFFIXES.add(PDL_FILE_SUFFIX); - - _restModelCompatMessage = new StringBuffer(); - _needCheckinFiles.clear(); - _needBuildFolders.clear(); - _possibleMissingFilesInEarlierCommit.clear(); - - project.getGradle().buildFinished(result -> - { - StringBuilder endOfBuildMessage = new StringBuilder(); - if (_restModelCompatMessage.length() > 0) - { - endOfBuildMessage.append(_restModelCompatMessage); - } - - if (!_needCheckinFiles.isEmpty()) - { - endOfBuildMessage.append(createModifiedFilesMessage(_needCheckinFiles, _needBuildFolders)); - } - - if (!_possibleMissingFilesInEarlierCommit.isEmpty()) - { - endOfBuildMessage.append(createPossibleMissingFilesMessage(_possibleMissingFilesInEarlierCommit)); - } - - if (endOfBuildMessage.length() > 0) - { - result.getGradle().getRootProject().getLogger().quiet(endOfBuildMessage.toString()); - } - }); - - // Set an extra property on the root project to indicate the initialization is complete for the current build. - project.getRootProject().getExtensions().getExtraProperties().set(RUN_ONCE, true); - } - } - - ConfigurationContainer configurations = project.getConfigurations(); - - // configuration for getting the required classes to make pegasus call main methods - configurations.maybeCreate(PEGASUS_PLUGIN_CONFIGURATION); - - // configuration for compiling generated data templates - Configuration dataTemplateCompile = configurations.maybeCreate("dataTemplateCompile"); - dataTemplateCompile.setVisible(false); - - // configuration for running rest client generator - Configuration restClientCompile = configurations.maybeCreate("restClientCompile"); - restClientCompile.setVisible(false); - - // configuration for running data template generator - // DEPRECATED! This configuration is no longer used. Please stop using it. - Configuration dataTemplateGenerator = configurations.maybeCreate("dataTemplateGenerator"); - dataTemplateGenerator.setVisible(false); - - // configuration for running rest client generator - // DEPRECATED! This configuration is no longer used. Please stop using it. - Configuration restTools = configurations.maybeCreate("restTools"); - restTools.setVisible(false); - - // configuration for running Avro schema generator - // DEPRECATED! To skip avro schema generation, use PegasusOptions.generationModes - Configuration avroSchemaGenerator = configurations.maybeCreate("avroSchemaGenerator"); - avroSchemaGenerator.setVisible(false); - - // configuration for depending on data schemas and potentially generated data templates - // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml - Configuration dataModel = configurations.maybeCreate("dataModel"); - Configuration testDataModel = configurations.maybeCreate("testDataModel"); - testDataModel.extendsFrom(dataModel); - - // configuration for depending on data schemas and potentially generated data templates - // and for publishing jars containing data schemas to the project artifacts for including in the ivy.xml - Configuration avroSchema = configurations.maybeCreate("avroSchema"); - Configuration testAvroSchema = configurations.maybeCreate("testAvroSchema"); - testAvroSchema.extendsFrom(avroSchema); - - // configuration for depending on rest idl and potentially generated client builders - // and for publishing jars containing rest idl to the project artifacts for including in the ivy.xml - Configuration restModel = configurations.maybeCreate("restModel"); - Configuration testRestModel = configurations.maybeCreate("testRestModel"); - testRestModel.extendsFrom(restModel); - - // configuration for publishing jars containing data schemas and generated data templates - // to the project artifacts for including in the ivy.xml - // - // published data template jars depends on the configurations used to compile the classes - // in the jar, this includes the data models/templates used by the data template generator - // and the classes used to compile the generated classes. - Configuration dataTemplate = configurations.maybeCreate("dataTemplate"); - dataTemplate.extendsFrom(dataTemplateCompile, dataModel); - Configuration testDataTemplate = configurations.maybeCreate("testDataTemplate"); - testDataTemplate.extendsFrom(dataTemplate, testDataModel); - - // configuration for processing and validating schema annotation during build time. - // - // The configuration contains dependencies to schema annotation handlers which would process schema annotations - // and validate. - Configuration schemaAnnotationHandler = configurations.maybeCreate(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION); - - // configuration for publishing jars containing rest idl and generated client builders - // to the project artifacts for including in the ivy.xml - // - // published client builder jars depends on the configurations used to compile the classes - // in the jar, this includes the data models/templates (potentially generated by this - // project and) used by the data template generator and the classes used to compile - // the generated classes. - Configuration restClient = configurations.maybeCreate("restClient"); - restClient.extendsFrom(restClientCompile, dataTemplate); - Configuration testRestClient = configurations.maybeCreate("testRestClient"); - testRestClient.extendsFrom(restClient, testDataTemplate); - - Properties properties = new Properties(); - InputStream inputStream = getClass().getResourceAsStream("/pegasus-version.properties"); - if (inputStream != null) - { - try - { - properties.load(inputStream); - } - catch (IOException e) - { - throw new GradleException("Unable to read pegasus-version.properties file.", e); - } - - String version = properties.getProperty("pegasus.version"); - - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data:" + version); - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:data-avro-generator:" + version); - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:generator:" + version); - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "com.linkedin.pegasus:restli-tools:" + version); - } - else - { - project.getLogger().lifecycle("Unable to add pegasus dependencies to {}. Please be sure that " - + "'com.linkedin.pegasus:data', 'com.linkedin.pegasus:data-avro-generator', 'com.linkedin.pegasus:generator', 'com.linkedin.pegasus:restli-tools'" - + " are available on the configuration pegasusPlugin", - project.getPath()); - } - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, "org.slf4j:slf4j-simple:1.7.2"); - project.getDependencies().add(PEGASUS_PLUGIN_CONFIGURATION, project.files(System.getProperty("java.home") + "/../lib/tools.jar")); - - // this call has to be here because: - // 1) artifact cannot be published once projects has been evaluated, so we need to first - // create the tasks and artifact handler, then progressively append sources - // 2) in order to append sources progressively, the source and documentation tasks and artifacts must be - // configured/created before configuring and creating the code generation tasks. - - configureGeneratedSourcesAndJavadoc(project); - - ChangedFileReportTask changedFileReportTask = project.getTasks() - .create("changedFilesReport", ChangedFileReportTask.class); - - project.getTasks().getByName("check").dependsOn(changedFileReportTask); - - SourceSetContainer sourceSets = project.getConvention() - .getPlugin(JavaPluginConvention.class).getSourceSets(); - - sourceSets.all(sourceSet -> - { - if (sourceSet.getName().toLowerCase(Locale.US).contains("generated")) - { - return; - } - - checkAvroSchemaExist(project, sourceSet); - - // the idl Generator input options will be inside the PegasusOptions class. Users of the - // plugin can set the inputOptions in their build.gradle - @SuppressWarnings("unchecked") - Map pegasusOptions = (Map) project - .getExtensions().getExtraProperties().get("pegasus"); - - pegasusOptions.put(sourceSet.getName(), new PegasusOptions()); - - // rest model generation could fail on incompatibility - // if it can fail, fail it early - configureRestModelGeneration(project, sourceSet); - - // Do compatibility check for schemas under "pegasus" directory if the configuration property is provided. - if (isPropertyTrue(project, ENABLE_PEGASUS_SCHEMA_COMPATIBILITY_CHECK)) - { - configurePegasusSchemaSnapshotGeneration(project, sourceSet, false); - } - - configurePegasusSchemaSnapshotGeneration(project, sourceSet, true); - - configureConversionUtilities(project, sourceSet); - - GenerateDataTemplateTask generateDataTemplateTask = configureDataTemplateGeneration(project, sourceSet); - - configureAvroSchemaGeneration(project, sourceSet); - - configureRestClientGeneration(project, sourceSet); - - if (!isPropertyTrue(project, DISABLE_SCHEMA_ANNOTATION_VALIDATION)) - { - configureSchemaAnnotationValidation(project, sourceSet, generateDataTemplateTask); - } - - Task cleanGeneratedDirTask = project.task(sourceSet.getTaskName("clean", "GeneratedDir")); - cleanGeneratedDirTask.doLast(new CacheableAction<>(task -> - { - deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE); - deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE); - deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE); - })); - - // make clean depends on deleting the generated directories - project.getTasks().getByName("clean").dependsOn(cleanGeneratedDirTask); - - // Set data schema directories as resource roots - configureDataSchemaResourcesRoot(project, sourceSet); - }); - - project.getExtensions().getExtraProperties().set(GENERATOR_CLASSLOADER_NAME, getClass().getClassLoader()); - } - - protected void configureSchemaAnnotationValidation(Project project, - SourceSet sourceSet, - GenerateDataTemplateTask generateDataTemplatesTask) - { - // Task would execute based on the following order. - // generateDataTemplatesTask -> validateSchemaAnnotationTask - - // Create ValidateSchemaAnnotation task - ValidateSchemaAnnotationTask validateSchemaAnnotationTask = project.getTasks() - .create(sourceSet.getTaskName("validate", "schemaAnnotation"), ValidateSchemaAnnotationTask.class, task -> - { - task.setInputDir(generateDataTemplatesTask.getInputDir()); - task.setResolverPath(getDataModelConfig(project, sourceSet)); // same resolver path as generateDataTemplatesTask - task.setClassPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION) - .plus(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)) - .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME))); - task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - } - ); - - // validateSchemaAnnotationTask depend on generateDataTemplatesTask - validateSchemaAnnotationTask.dependsOn(generateDataTemplatesTask); - - // Check depends on validateSchemaAnnotationTask. - project.getTasks().getByName("check").dependsOn(validateSchemaAnnotationTask); - } - - - - @SuppressWarnings("deprecation") - protected void configureGeneratedSourcesAndJavadoc(Project project) - { - _generateJavadocTask = project.getTasks().create("generateJavadoc", Javadoc.class); - - if (_generateSourcesJarTask == null) - { - // - // configuration for publishing jars containing sources for generated classes - // to the project artifacts for including in the ivy.xml - // - ConfigurationContainer configurations = project.getConfigurations(); - Configuration generatedSources = configurations.maybeCreate("generatedSources"); - Configuration testGeneratedSources = configurations.maybeCreate("testGeneratedSources"); - testGeneratedSources.extendsFrom(generatedSources); - - _generateSourcesJarTask = project.getTasks().create("generateSourcesJar", Jar.class, jarTask -> { - jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP); - jarTask.setDescription("Generates a jar file containing the sources for the generated Java classes."); - // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - jarTask.getArchiveClassifier().set("sources"); - }); - - project.getArtifacts().add("generatedSources", _generateSourcesJarTask); - } - - if (_generateJavadocJarTask == null) - { - // - // configuration for publishing jars containing Javadoc for generated classes - // to the project artifacts for including in the ivy.xml - // - ConfigurationContainer configurations = project.getConfigurations(); - Configuration generatedJavadoc = configurations.maybeCreate("generatedJavadoc"); - Configuration testGeneratedJavadoc = configurations.maybeCreate("testGeneratedJavadoc"); - testGeneratedJavadoc.extendsFrom(generatedJavadoc); - - _generateJavadocJarTask = project.getTasks().create("generateJavadocJar", Jar.class, jarTask -> { - jarTask.dependsOn(_generateJavadocTask); - jarTask.setGroup(JavaBasePlugin.DOCUMENTATION_GROUP); - jarTask.setDescription("Generates a jar file containing the Javadoc for the generated Java classes."); - // FIXME change to #getArchiveClassifier().set("sources"); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - jarTask.getArchiveClassifier().set("javadoc"); - jarTask.from(_generateJavadocTask.getDestinationDir()); - }); - - project.getArtifacts().add("generatedJavadoc", _generateJavadocJarTask); - } - else - { - // TODO: Tighten the types so that _generateJavadocJarTask must be of type Jar. - ((Jar) _generateJavadocJarTask).from(_generateJavadocTask.getDestinationDir()); - _generateJavadocJarTask.dependsOn(_generateJavadocTask); - } - } - - private static void deleteGeneratedDir(Project project, SourceSet sourceSet, String dirType) - { - String generatedDirPath = getGeneratedDirPath(project, sourceSet, dirType); - project.getLogger().info("Delete generated directory {}", generatedDirPath); - project.delete(generatedDirPath); - } - - private static > Class getCompatibilityLevelClass(Project project) - { - ClassLoader generatorClassLoader = (ClassLoader) project.property(GENERATOR_CLASSLOADER_NAME); - - String className = "com.linkedin.restli.tools.idlcheck.CompatibilityLevel"; - try - { - @SuppressWarnings("unchecked") - Class enumClass = (Class) generatorClassLoader.loadClass(className).asSubclass(Enum.class); - return enumClass; - } - catch (ClassNotFoundException e) - { - throw new RuntimeException("Could not load class " + className); - } - } - - private static void addGeneratedDir(Project project, SourceSet sourceSet, Collection configurations) - { - project.getPlugins().withType(IdeaPlugin.class, ideaPlugin -> { - IdeaModule ideaModule = ideaPlugin.getModel().getModule(); - // stupid if block needed because of stupid assignment required to update source dirs - if (isTestSourceSet(sourceSet)) - { - Set sourceDirs = ideaModule.getTestSourceDirs(); - sourceDirs.addAll(sourceSet.getJava().getSrcDirs()); - // this is stupid but assignment is required - ideaModule.setTestSourceDirs(sourceDirs); - if (debug) - { - System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule testSourceDirs " - + ideaModule.getTestSourceDirs()); - } - } - else - { - Set sourceDirs = ideaModule.getSourceDirs(); - sourceDirs.addAll(sourceSet.getJava().getSrcDirs()); - // this is stupid but assignment is required - ideaModule.setSourceDirs(sourceDirs); - if (debug) - { - System.out.println("Added " + sourceSet.getJava().getSrcDirs() + " to IdeaModule sourceDirs " - + ideaModule.getSourceDirs()); - } - } - Collection compilePlus = ideaModule.getScopes().get("COMPILE").get("plus"); - compilePlus.addAll(configurations); - ideaModule.getScopes().get("COMPILE").put("plus", compilePlus); - }); - } - - private static void checkAvroSchemaExist(Project project, SourceSet sourceSet) - { - String sourceDir = "src" + File.separatorChar + sourceSet.getName(); - File avroSourceDir = project.file(sourceDir + File.separatorChar + "avro"); - if (avroSourceDir.exists()) - { - project.getLogger().lifecycle("{}'s {} has non-empty avro directory. pegasus plugin does not process avro directory", - project.getName(), sourceDir); - } - } - - // Compute the name of the source set that will contain a type of an input generated code. - // e.g. genType may be 'DataTemplate' or 'Rest' - private static String getGeneratedSourceSetName(SourceSet sourceSet, String genType) - { - return sourceSet.getName() + "Generated" + genType; - } - - // Compute the directory name that will contain a type generated code of an input source set. - // e.g. genType may be 'DataTemplate' or 'Rest' - public static String getGeneratedDirPath(Project project, SourceSet sourceSet, String genType) - { - String override = getOverridePath(project, sourceSet, "overrideGeneratedDir"); - String sourceSetName = getGeneratedSourceSetName(sourceSet, genType); - String base = override == null ? "src" : override; - - return base + File.separatorChar + sourceSetName; - } - - public static String getDataSchemaPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, "overridePegasusDir"); - if (override == null) - { - return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "pegasus"; - } - else - { - return override; - } - } - - private static String getExtensionSchemaPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, "overrideExtensionSchemaDir"); - if(override == null) - { - return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "extensions"; - } - else - { - return override; - } - } - - private static String getSnapshotPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, "overrideSnapshotDir"); - if (override == null) - { - return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "snapshot"; - } - else - { - return override; - } - } - - private static String getIdlPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, "overrideIdlDir"); - if (override == null) - { - return "src" + File.separatorChar + sourceSet.getName() + File.separatorChar + "idl"; - } - else - { - return override; - } - } - - private static String getPegasusSchemaSnapshotPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, PEGASUS_SCHEMA_SNAPSHOT_DIR_OVERRIDE); - if (override == null) - { - return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_SCHEMA_SNAPSHOT_DIR; - } - else - { - return override; - } - } - - private static String getPegasusExtensionSchemaSnapshotPath(Project project, SourceSet sourceSet) - { - String override = getOverridePath(project, sourceSet, PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR_OVERRIDE); - if (override == null) - { - return SRC + File.separatorChar + sourceSet.getName() + File.separatorChar + PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_DIR; - } - else - { - return override; - } - } - - private static String getOverridePath(Project project, SourceSet sourceSet, String overridePropertyName) - { - String sourceSetPropertyName = sourceSet.getName() + '.' + overridePropertyName; - String override = getNonEmptyProperty(project, sourceSetPropertyName); - - if (override == null && sourceSet.getName().equals("main")) - { - override = getNonEmptyProperty(project, overridePropertyName); - } - - return override; - } - - private static boolean isTestSourceSet(SourceSet sourceSet) - { - return TEST_DIR_REGEX.matcher(sourceSet.getName()).find(); - } - - private static Configuration getDataModelConfig(Project project, SourceSet sourceSet) - { - return isTestSourceSet(sourceSet) - ? project.getConfigurations().getByName("testDataModel") - : project.getConfigurations().getByName("dataModel"); - } - - private static boolean isTaskSuccessful(Task task) - { - return task.getState().getExecuted() - // Task is not successful if it is not upto date and is skipped. - && !(task.getState().getSkipped() && !task.getState().getUpToDate()) - && task.getState().getFailure() == null; - } - - private static boolean isResultEquivalent(File compatibilityLogFile) - { - return isResultEquivalent(compatibilityLogFile, false); - } - - private static boolean isResultEquivalent(File compatibilityLogFile, boolean restSpecOnly) - { - CompatibilityLogChecker logChecker = new CompatibilityLogChecker(); - try - { - logChecker.write(Files.readAllBytes(compatibilityLogFile.toPath())); - } - catch (IOException e) - { - throw new GradleException("Error while processing compatibility report: " + e.getMessage()); - } - return logChecker.getRestSpecCompatibility().isEmpty() && - (restSpecOnly || logChecker.getModelCompatibility().isEmpty()); - } - - protected void configureRestModelGeneration(Project project, SourceSet sourceSet) - { - if (sourceSet.getAllSource().isEmpty()) - { - project.getLogger().info("No source files found for sourceSet {}. Skipping idl generation.", sourceSet.getName()); - return; - } - - // afterEvaluate needed so that api project can be overridden via ext.apiProject - project.afterEvaluate(p -> - { - // find api project here instead of in each project's plugin configuration - // this allows api project relation options (ext.api*) to be specified anywhere in the build.gradle file - // alternatively, pass closures to task configuration, and evaluate the closures when task is executed - Project apiProject = getCheckedApiProject(project); - - // make sure the api project is evaluated. Important for configure-on-demand mode. - if (apiProject != null) - { - project.evaluationDependsOn(apiProject.getPath()); - - if (!apiProject.getPlugins().hasPlugin(_thisPluginType)) - { - apiProject = null; - } - } - - if (apiProject == null) - { - return; - } - - Task untypedJarTask = project.getTasks().findByName(sourceSet.getJarTaskName()); - if (!(untypedJarTask instanceof Jar)) - { - return; - } - Jar jarTask = (Jar) untypedJarTask; - - String snapshotCompatPropertyName = findProperty(FileCompatibilityType.SNAPSHOT); - if (project.hasProperty(snapshotCompatPropertyName) && "off".equalsIgnoreCase((String) project.property(snapshotCompatPropertyName))) - { - project.getLogger().lifecycle("Project {} snapshot compatibility level \"OFF\" is deprecated. Default to \"IGNORE\".", - project.getPath()); - } - - // generate the rest model - FileCollection restModelCodegenClasspath = project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION) - .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME)) - .plus(sourceSet.getRuntimeClasspath()); - String destinationDirPrefix = getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE) + File.separatorChar; - FileCollection restModelResolverPath = apiProject.files(getDataSchemaPath(project, sourceSet)) - .plus(getDataModelConfig(apiProject, sourceSet)); - Set watchedRestModelInputDirs = buildWatchedRestModelInputDirs(project, sourceSet); - Set restModelInputDirs = difference(sourceSet.getAllSource().getSrcDirs(), - sourceSet.getResources().getSrcDirs()); - - Task generateRestModelTask = project.getTasks() - .create(sourceSet.getTaskName("generate", "restModel"), GenerateRestModelTask.class, task -> - { - task.dependsOn(project.getTasks().getByName(sourceSet.getClassesTaskName())); - task.setCodegenClasspath(restModelCodegenClasspath); - task.setWatchedCodegenClasspath(restModelCodegenClasspath - .filter(file -> !"main".equals(file.getName()) && !"classes".equals(file.getName()))); - task.setInputDirs(restModelInputDirs); - task.setWatchedInputDirs(watchedRestModelInputDirs.isEmpty() - ? restModelInputDirs : watchedRestModelInputDirs); - // we need all the artifacts from runtime for any private implementation classes the server code might need. - task.setSnapshotDestinationDir(project.file(destinationDirPrefix + "snapshot")); - task.setIdlDestinationDir(project.file(destinationDirPrefix + "idl")); - - @SuppressWarnings("unchecked") - Map pegasusOptions = (Map) project - .getExtensions().getExtraProperties().get("pegasus"); - task.setIdlOptions(pegasusOptions.get(sourceSet.getName()).idlOptions); - - task.setResolverPath(restModelResolverPath); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - - task.onlyIf(t -> !isPropertyTrue(project, SKIP_GENERATE_REST_MODEL)); - - task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, REST_GEN_TYPE))); - }); - - File apiSnapshotDir = apiProject.file(getSnapshotPath(apiProject, sourceSet)); - File apiIdlDir = apiProject.file(getIdlPath(apiProject, sourceSet)); - apiSnapshotDir.mkdirs(); - - if (!isPropertyTrue(project, SKIP_IDL_CHECK)) - { - apiIdlDir.mkdirs(); - } - - CheckRestModelTask checkRestModelTask = project.getTasks() - .create(sourceSet.getTaskName("check", "RestModel"), CheckRestModelTask.class, task -> - { - task.dependsOn(generateRestModelTask); - task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); - task.setPreviousSnapshotDirectory(apiSnapshotDir); - task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); - task.setPreviousIdlDirectory(apiIdlDir); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setModelCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT)); - task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK)); - - task.doLast(new CacheableAction<>(t -> - { - if (!task.isEquivalent()) - { - _restModelCompatMessage.append(task.getWholeMessage()); - } - })); - }); - - CheckSnapshotTask checkSnapshotTask = project.getTasks() - .create(sourceSet.getTaskName("check", "Snapshot"), CheckSnapshotTask.class, task -> { - task.dependsOn(generateRestModelTask); - task.setCurrentSnapshotFiles(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); - task.setPreviousSnapshotDirectory(apiSnapshotDir); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setSnapshotCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.SNAPSHOT)); - - task.onlyIf(t -> isPropertyTrue(project, SKIP_IDL_CHECK)); - }); - - CheckIdlTask checkIdlTask = project.getTasks() - .create(sourceSet.getTaskName("check", "Idl"), CheckIdlTask.class, task -> - { - task.dependsOn(generateRestModelTask); - task.setCurrentIdlFiles(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); - task.setPreviousIdlDirectory(apiIdlDir); - task.setResolverPath(restModelResolverPath); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setIdlCompatLevel(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - - - task.onlyIf(t -> !isPropertyTrue(project, SKIP_IDL_CHECK) - && !"OFF".equals(PropertyUtil.findCompatLevel(project, FileCompatibilityType.IDL))); - }); - - // rest model publishing involves cross-project reference - // configure after all projects have been evaluated - // the file copy can be turned off by "rest.model.noPublish" flag - Task publishRestliSnapshotTask = project.getTasks() - .create(sourceSet.getTaskName("publish", "RestliSnapshot"), PublishRestModelTask.class, task -> - { - task.dependsOn(checkRestModelTask, checkSnapshotTask, checkIdlTask); - task.from(SharedFileUtils.getSnapshotFiles(project, destinationDirPrefix)); - task.into(apiSnapshotDir); - task.setSuffix(SNAPSHOT_FILE_SUFFIX); - - task.onlyIf(t -> - isPropertyTrue(project, SNAPSHOT_FORCE_PUBLISH) || - ( - !isPropertyTrue(project, SNAPSHOT_NO_PUBLISH) && - ( - ( - isPropertyTrue(project, SKIP_IDL_CHECK) && - isTaskSuccessful(checkSnapshotTask) && - checkSnapshotTask.getSummaryTarget().exists() && - !isResultEquivalent(checkSnapshotTask.getSummaryTarget()) - ) || - ( - !isPropertyTrue(project, SKIP_IDL_CHECK) && - isTaskSuccessful(checkRestModelTask) && - checkRestModelTask.getSummaryTarget().exists() && - !isResultEquivalent(checkRestModelTask.getSummaryTarget()) - ) - )) - ); - }); - - Task publishRestliIdlTask = project.getTasks() - .create(sourceSet.getTaskName("publish", "RestliIdl"), PublishRestModelTask.class, task -> { - task.dependsOn(checkRestModelTask, checkIdlTask, checkSnapshotTask); - task.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); - task.into(apiIdlDir); - task.setSuffix(IDL_FILE_SUFFIX); - - task.onlyIf(t -> - isPropertyTrue(project, IDL_FORCE_PUBLISH) || - ( - !isPropertyTrue(project, IDL_NO_PUBLISH) && - ( - ( - isPropertyTrue(project, SKIP_IDL_CHECK) && - isTaskSuccessful(checkSnapshotTask) && - checkSnapshotTask.getSummaryTarget().exists() && - !isResultEquivalent(checkSnapshotTask.getSummaryTarget(), true) - ) || - ( - !isPropertyTrue(project, SKIP_IDL_CHECK) && - ( - (isTaskSuccessful(checkRestModelTask) && - checkRestModelTask.getSummaryTarget().exists() && - !isResultEquivalent(checkRestModelTask.getSummaryTarget(), true)) || - (isTaskSuccessful(checkIdlTask) && - checkIdlTask.getSummaryTarget().exists() && - !isResultEquivalent(checkIdlTask.getSummaryTarget())) - ) - ) - )) - ); - }); - - project.getLogger().info("API project selected for {} is {}", - publishRestliIdlTask.getPath(), apiProject.getPath()); - - jarTask.from(SharedFileUtils.getIdlFiles(project, destinationDirPrefix)); - // add generated .restspec.json files as resources to the jar - jarTask.dependsOn(publishRestliSnapshotTask, publishRestliIdlTask); - - ChangedFileReportTask changedFileReportTask = (ChangedFileReportTask) project.getTasks() - .getByName("changedFilesReport"); - - // Use the files from apiDir for generating the changed files report as we need to notify user only when - // source system files are modified. - changedFileReportTask.setIdlFiles(SharedFileUtils.getSuffixedFiles(project, apiIdlDir, IDL_FILE_SUFFIX)); - changedFileReportTask.setSnapshotFiles(SharedFileUtils.getSuffixedFiles(project, apiSnapshotDir, - SNAPSHOT_FILE_SUFFIX)); - changedFileReportTask.mustRunAfter(publishRestliSnapshotTask, publishRestliIdlTask); - changedFileReportTask.doLast(new CacheableAction<>(t -> - { - if (!changedFileReportTask.getNeedCheckinFiles().isEmpty()) - { - project.getLogger().info("Adding modified files to need checkin list..."); - _needCheckinFiles.addAll(changedFileReportTask.getNeedCheckinFiles()); - _needBuildFolders.add(getCheckedApiProject(project).getPath()); - } - })); - }); - } - - protected void configurePegasusSchemaSnapshotGeneration(Project project, SourceSet sourceSet, boolean isExtensionSchema) - { - File schemaDir = isExtensionSchema? project.file(getExtensionSchemaPath(project, sourceSet)) - : project.file(getDataSchemaPath(project, sourceSet)); - - if ((isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, PDL_FILE_SUFFIX).isEmpty()) || - (!isExtensionSchema && SharedFileUtils.getSuffixedFiles(project, schemaDir, DATA_TEMPLATE_FILE_SUFFIXES).isEmpty())) - { - return; - } - - Path publishablePegasusSchemaSnapshotDir = project.getBuildDir().toPath().resolve(sourceSet.getName() + - (isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT)); - - Task generatePegasusSchemaSnapshot = generatePegasusSchemaSnapshot(project, sourceSet, - isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, schemaDir, - publishablePegasusSchemaSnapshotDir.toFile(), isExtensionSchema); - - File pegasusSchemaSnapshotDir = project.file(isExtensionSchema ? getPegasusExtensionSchemaSnapshotPath(project, sourceSet) - : getPegasusSchemaSnapshotPath(project, sourceSet)); - pegasusSchemaSnapshotDir.mkdirs(); - - Task checkSchemaSnapshot = project.getTasks().create(sourceSet.getTaskName("check", - isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT), - CheckPegasusSnapshotTask.class, task -> - { - task.dependsOn(generatePegasusSchemaSnapshot); - task.setCurrentSnapshotDirectory(publishablePegasusSchemaSnapshotDir.toFile()); - task.setPreviousSnapshotDirectory(pegasusSchemaSnapshotDir); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION) - .plus(project.getConfigurations().getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)) - .plus(project.getConfigurations().getByName(JavaPlugin.RUNTIME_CLASSPATH_CONFIGURATION_NAME))); - task.setCompatibilityLevel(isExtensionSchema ? - PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT) - :PropertyUtil.findCompatLevel(project, FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT)); - task.setCompatibilityMode(isExtensionSchema ? COMPATIBILITY_OPTIONS_MODE_EXTENSION : - PropertyUtil.findCompatMode(project, PEGASUS_COMPATIBILITY_MODE)); - task.setExtensionSchema(isExtensionSchema); - task.setHandlerJarPath(project.getConfigurations() .getByName(SCHEMA_ANNOTATION_HANDLER_CONFIGURATION)); - - task.onlyIf(t -> - { - String pegasusSnapshotCompatPropertyName = isExtensionSchema ? - findProperty(FileCompatibilityType.PEGASUS_EXTENSION_SCHEMA_SNAPSHOT) - : findProperty(FileCompatibilityType.PEGASUS_SCHEMA_SNAPSHOT); - return !project.hasProperty(pegasusSnapshotCompatPropertyName) || - !"off".equalsIgnoreCase((String) project.property(pegasusSnapshotCompatPropertyName)); - }); - }); - - Task publishPegasusSchemaSnapshot = publishPegasusSchemaSnapshot(project, sourceSet, - isExtensionSchema ? PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: PEGASUS_SCHEMA_SNAPSHOT, checkSchemaSnapshot, - publishablePegasusSchemaSnapshotDir.toFile(), pegasusSchemaSnapshotDir); - - project.getTasks().getByName(LifecycleBasePlugin.ASSEMBLE_TASK_NAME).dependsOn(publishPegasusSchemaSnapshot); - } - - @SuppressWarnings("deprecation") - protected void configureAvroSchemaGeneration(Project project, SourceSet sourceSet) - { - File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); - File avroDir = project.file(getGeneratedDirPath(project, sourceSet, AVRO_SCHEMA_GEN_TYPE) - + File.separatorChar + "avro"); - - // generate avro schema files from data schema - Task generateAvroSchemaTask = project.getTasks() - .create(sourceSet.getTaskName("generate", "avroSchema"), GenerateAvroSchemaTask.class, task -> { - task.setInputDir(dataSchemaDir); - task.setDestinationDir(avroDir); - task.setResolverPath(getDataModelConfig(project, sourceSet)); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - - task.onlyIf(t -> - { - if (task.getInputDir().exists()) - { - @SuppressWarnings("unchecked") - Map pegasusOptions = (Map) project - .getExtensions().getExtraProperties().get("pegasus"); - - if (pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.AVRO)) - { - return true; - } - } - - return !project.getConfigurations().getByName("avroSchemaGenerator").isEmpty(); - }); - - task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, AVRO_SCHEMA_GEN_TYPE))); - }); - - project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(generateAvroSchemaTask); - - // create avro schema jar file - - Task avroSchemaJarTask = project.getTasks().create(sourceSet.getName() + "AvroSchemaJar", Jar.class, task -> - { - // add path prefix to each file in the data schema directory - task.from(avroDir, copySpec -> - copySpec.eachFile(fileCopyDetails -> - fileCopyDetails.setPath("avro" + File.separatorChar + fileCopyDetails.getPath()))); - - // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - task.getArchiveAppendix().set(getAppendix(sourceSet, "avro-schema")); - task.setDescription("Generate an avro schema jar"); - }); - - if (!isTestSourceSet(sourceSet)) - { - project.getArtifacts().add("avroSchema", avroSchemaJarTask); - } - else - { - project.getArtifacts().add("testAvroSchema", avroSchemaJarTask); - } - } - - protected void configureConversionUtilities(Project project, SourceSet sourceSet) - { - File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); - boolean reverse = isPropertyTrue(project, CONVERT_TO_PDL_REVERSE); - boolean keepOriginal = isPropertyTrue(project, CONVERT_TO_PDL_KEEP_ORIGINAL); - boolean skipVerification = isPropertyTrue(project, CONVERT_TO_PDL_SKIP_VERIFICATION); - String preserveSourceCmd = getNonEmptyProperty(project, CONVERT_TO_PDL_PRESERVE_SOURCE_CMD); - - // Utility task for migrating between PDSC and PDL. - project.getTasks().create(sourceSet.getTaskName("convert", "ToPdl"), TranslateSchemasTask.class, task -> - { - task.setInputDir(dataSchemaDir); - task.setDestinationDir(dataSchemaDir); - task.setResolverPath(getDataModelConfig(project, sourceSet)); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setPreserveSourceCmd(preserveSourceCmd); - if (reverse) - { - task.setSourceFormat(SchemaFileType.PDL); - task.setDestinationFormat(SchemaFileType.PDSC); - } - else - { - task.setSourceFormat(SchemaFileType.PDSC); - task.setDestinationFormat(SchemaFileType.PDL); - } - task.setKeepOriginal(keepOriginal); - task.setSkipVerification(skipVerification); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - - task.onlyIf(t -> task.getInputDir().exists()); - task.doLast(new CacheableAction<>(t -> - { - project.getLogger().lifecycle("Pegasus schema conversion complete."); - project.getLogger().lifecycle("All pegasus schema files in " + dataSchemaDir + " have been converted"); - project.getLogger().lifecycle("You can use '-PconvertToPdl.reverse=true|false' to change the direction of conversion."); - })); - }); - - // Helper task for reformatting existing PDL schemas by generating them again. - project.getTasks().create(sourceSet.getTaskName("reformat", "Pdl"), TranslateSchemasTask.class, task -> - { - task.setInputDir(dataSchemaDir); - task.setDestinationDir(dataSchemaDir); - task.setResolverPath(getDataModelConfig(project, sourceSet)); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setSourceFormat(SchemaFileType.PDL); - task.setDestinationFormat(SchemaFileType.PDL); - task.setKeepOriginal(true); - task.setSkipVerification(true); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - - task.onlyIf(t -> task.getInputDir().exists()); - task.doLast(new CacheableAction<>(t -> project.getLogger().lifecycle("PDL reformat complete."))); - }); - } - - @SuppressWarnings("deprecation") - protected GenerateDataTemplateTask configureDataTemplateGeneration(Project project, SourceSet sourceSet) - { - File dataSchemaDir = project.file(getDataSchemaPath(project, sourceSet)); - File generatedDataTemplateDir = project.file(getGeneratedDirPath(project, sourceSet, DATA_TEMPLATE_GEN_TYPE) - + File.separatorChar + "java"); - File publishableSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() - + File.separatorChar + sourceSet.getName() + "Schemas"); - File publishableLegacySchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() - + File.separatorChar + sourceSet.getName() + "LegacySchemas"); - File publishableExtensionSchemasBuildDir = project.file(project.getBuildDir().getAbsolutePath() - + File.separatorChar + sourceSet.getName() + "ExtensionSchemas"); - - // generate data template source files from data schema - GenerateDataTemplateTask generateDataTemplatesTask = project.getTasks() - .create(sourceSet.getTaskName("generate", "dataTemplate"), GenerateDataTemplateTask.class, task -> - { - task.setInputDir(dataSchemaDir); - task.setDestinationDir(generatedDataTemplateDir); - task.setResolverPath(getDataModelConfig(project, sourceSet)); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE)) - { - task.setGenerateLowercasePath(false); - } - - task.onlyIf(t -> - { - if (task.getInputDir().exists()) - { - @SuppressWarnings("unchecked") - Map pegasusOptions = (Map) project - .getExtensions().getExtraProperties().get("pegasus"); - - return pegasusOptions.get(sourceSet.getName()).hasGenerationMode(PegasusOptions.GenerationMode.PEGASUS); - } - - return false; - }); - - task.doFirst(new CacheableAction<>(t -> deleteGeneratedDir(project, sourceSet, DATA_TEMPLATE_GEN_TYPE))); - }); - - // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar. - ((Jar) _generateSourcesJarTask).from(generateDataTemplatesTask.getDestinationDir()); - _generateSourcesJarTask.dependsOn(generateDataTemplatesTask); - - _generateJavadocTask.source(generateDataTemplatesTask.getDestinationDir()); - _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath() - .plus(project.getConfigurations().getByName("dataTemplateCompile")) - .plus(generateDataTemplatesTask.getResolverPath())); - _generateJavadocTask.dependsOn(generateDataTemplatesTask); - - // Add extra dependencies for data model compilation - project.getDependencies().add("dataTemplateCompile", "com.google.code.findbugs:jsr305:3.0.2"); - - // create new source set for generated java source and class files - String targetSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE); - - SourceSetContainer sourceSets = project.getConvention() - .getPlugin(JavaPluginConvention.class).getSourceSets(); - - SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss -> - { - ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedDataTemplateDir)); - ss.setCompileClasspath(getDataModelConfig(project, sourceSet) - .plus(project.getConfigurations().getByName("dataTemplateCompile"))); - }); - - // idea plugin needs to know about new generated java source directory and its dependencies - addGeneratedDir(project, targetSourceSet, Arrays.asList( - getDataModelConfig(project, sourceSet), - project.getConfigurations().getByName("dataTemplateCompile"))); - - // Set source compatibility to 1.8 as the data-templates now generate code with Java 8 features. - JavaCompile compileTask = project.getTasks() - .withType(JavaCompile.class).getByName(targetSourceSet.getCompileJavaTaskName()); - compileTask.doFirst(new CacheableAction<>(task -> { - ((JavaCompile) task).setSourceCompatibility("1.8"); - ((JavaCompile) task).setTargetCompatibility("1.8"); - })); - // make sure that java source files have been generated before compiling them - compileTask.dependsOn(generateDataTemplatesTask); - - // Dummy task to maintain backward compatibility - // TODO: Delete this task once use cases have had time to reference the new task - Task destroyStaleFiles = project.getTasks().create(sourceSet.getName() + "DestroyStaleFiles", Delete.class); - destroyStaleFiles.onlyIf(task -> { - project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath()); - return false; - }); - - // Dummy task to maintain backward compatibility, as this task was replaced by CopySchemas - // TODO: Delete this task once use cases have had time to reference the new task - Task copyPdscSchemasTask = project.getTasks().create(sourceSet.getName() + "CopyPdscSchemas", Copy.class); - copyPdscSchemasTask.dependsOn(destroyStaleFiles); - copyPdscSchemasTask.onlyIf(task -> { - project.getLogger().lifecycle("{} task is a NO-OP task.", task.getPath()); - return false; - }); - - // Prepare schema files for publication by syncing schema folders. - Task prepareSchemasForPublishTask = project.getTasks() - .create(sourceSet.getName() + "CopySchemas", Sync.class, task -> - { - task.from(dataSchemaDir, syncSpec -> DATA_TEMPLATE_FILE_SUFFIXES.forEach(suffix -> syncSpec.include("**/*" + suffix))); - task.into(publishableSchemasBuildDir); - }); - prepareSchemasForPublishTask.dependsOn(copyPdscSchemasTask); - - Collection dataTemplateJarDepends = new ArrayList<>(); - dataTemplateJarDepends.add(compileTask); - dataTemplateJarDepends.add(prepareSchemasForPublishTask); - - // Convert all PDL files back to PDSC for publication - // TODO: Remove this conversion permanently once translated PDSCs are no longer needed. - Task prepareLegacySchemasForPublishTask = project.getTasks() - .create(sourceSet.getName() + "TranslateSchemas", TranslateSchemasTask.class, task -> - { - task.setInputDir(dataSchemaDir); - task.setDestinationDir(publishableLegacySchemasBuildDir); - task.setResolverPath(getDataModelConfig(project, sourceSet)); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setSourceFormat(SchemaFileType.PDL); - task.setDestinationFormat(SchemaFileType.PDSC); - task.setKeepOriginal(true); - task.setSkipVerification(true); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - }); - - prepareLegacySchemasForPublishTask.dependsOn(destroyStaleFiles); - dataTemplateJarDepends.add(prepareLegacySchemasForPublishTask); - - // extension schema directory - File extensionSchemaDir = project.file(getExtensionSchemaPath(project, sourceSet)); - - if (!SharedFileUtils.getSuffixedFiles(project, extensionSchemaDir, PDL_FILE_SUFFIX).isEmpty()) - { - // Validate extension schemas if extension schemas are provided. - ValidateExtensionSchemaTask validateExtensionSchemaTask = project.getTasks() - .create(sourceSet.getTaskName("validate", "ExtensionSchemas"), ValidateExtensionSchemaTask.class, task -> - { - task.setInputDir(extensionSchemaDir); - task.setResolverPath( - getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet)))); - task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - }); - - Task prepareExtensionSchemasForPublishTask = project.getTasks() - .create(sourceSet.getName() + "CopyExtensionSchemas", Sync.class, task -> - { - task.from(extensionSchemaDir, syncSpec -> syncSpec.include("**/*" + PDL_FILE_SUFFIX)); - task.into(publishableExtensionSchemasBuildDir); - }); - - prepareExtensionSchemasForPublishTask.dependsOn(validateExtensionSchemaTask); - prepareExtensionSchemasForPublishTask.dependsOn(copyPdscSchemasTask); - dataTemplateJarDepends.add(prepareExtensionSchemasForPublishTask); - } - - // include pegasus files in the output of this SourceSet - project.getTasks().withType(ProcessResources.class).getByName(targetSourceSet.getProcessResourcesTaskName(), it -> - { - it.from(prepareSchemasForPublishTask, copy -> copy.into("pegasus")); - // TODO: Remove this permanently once translated PDSCs are no longer needed. - it.from(prepareLegacySchemasForPublishTask, copy -> copy.into(TRANSLATED_SCHEMAS_DIR)); - Sync copyExtensionSchemasTask = project.getTasks().withType(Sync.class).findByName(sourceSet.getName() + "CopyExtensionSchemas"); - if (copyExtensionSchemasTask != null) - { - it.from(copyExtensionSchemasTask, copy -> copy.into("extensions")); - } - }); - - // create data template jar file - Jar dataTemplateJarTask = project.getTasks() - .create(sourceSet.getName() + "DataTemplateJar", Jar.class, task -> - { - task.dependsOn(dataTemplateJarDepends); - task.from(targetSourceSet.getOutput()); - - // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - task.getArchiveAppendix().set(getAppendix(sourceSet, "data-template")); - task.setDescription("Generate a data template jar"); - }); - - // add the data model and date template jars to the list of project artifacts. - if (!isTestSourceSet(sourceSet)) - { - project.getArtifacts().add("dataTemplate", dataTemplateJarTask); - } - else - { - project.getArtifacts().add("testDataTemplate", dataTemplateJarTask); - } - - // include additional dependencies into the appropriate configuration used to compile the input source set - // must include the generated data template classes and their dependencies the configuration. - // "compile" and "testCompile" configurations have been removed in Gradle 7, - // but to keep the maximum backward compatibility, here we handle Gradle 7 and earlier version differently - // Once MIN_REQUIRED_VERSION reaches 7.0, we can remove the check of isAtLeastGradle7() - String compileConfigName; - if (isAtLeastGradle7()) { - compileConfigName = isTestSourceSet(sourceSet) ? "testImplementation" : project.getConfigurations().findByName("api") != null ? "api" : "implementation"; - } - else - { - compileConfigName = isTestSourceSet(sourceSet) ? "testCompile" : "compile"; - } - - Configuration compileConfig = project.getConfigurations().maybeCreate(compileConfigName); - compileConfig.extendsFrom( - getDataModelConfig(project, sourceSet), - project.getConfigurations().getByName("dataTemplateCompile")); - - // The getArchivePath() API doesn’t carry any task dependency and has been deprecated. - // Replace it with getArchiveFile() on Gradle 7, - // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1 - // DataHub Note - applied FIXME - project.getDependencies().add(compileConfigName, project.files( - isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath())); - - if (_configureIvyPublications) { - // The below Action is only applied when the 'ivy-publish' is applied by the consumer. - // If the consumer does not use ivy-publish, this is a noop. - // this Action prepares the project applying the pegasus plugin to publish artifacts using these steps: - // 1. Registers "feature variants" for pegasus-specific artifacts; - // see https://docs.gradle.org/6.1/userguide/feature_variants.html - // 2. Wires legacy configurations like `dataTemplateCompile` to auto-generated feature variant *Api and - // *Implementation configurations for backwards compatibility. - // 3. Configures the Ivy Publication to include auto-generated feature variant *Api and *Implementation - // configurations and their dependencies. - project.getPlugins().withType(IvyPublishPlugin.class, ivyPublish -> { - if (!isAtLeastGradle61()) - { - throw new GradleException("Using the ivy-publish plugin with the pegasus plugin requires Gradle 6.1 or higher " + - "at build time. Please upgrade."); - } - - JavaPluginExtension java = project.getExtensions().getByType(JavaPluginExtension.class); - // create new capabilities per source set; automatically creates api and implementation configurations - String featureName = mapSourceSetToFeatureName(targetSourceSet); - try - { - /* - reflection is required to preserve compatibility with Gradle 5.2.1 and below - TODO once Gradle 5.3+ is required, remove reflection and replace with: - java.registerFeature(featureName, featureSpec -> { - featureSpec.usingSourceSet(targetSourceSet); - }); - */ - Method registerFeature = JavaPluginExtension.class.getDeclaredMethod("registerFeature", String.class, Action.class); - Action/**/ featureSpecAction = createFeatureVariantFromSourceSet(targetSourceSet); - registerFeature.invoke(java, featureName, featureSpecAction); - } - catch (ReflectiveOperationException e) - { - throw new GradleException("Unable to register new feature variant", e); - } - - // expose transitive dependencies to consumers via variant configurations - Configuration featureConfiguration = project.getConfigurations().getByName(featureName); - Configuration mainGeneratedDataTemplateApi = project.getConfigurations().getByName(targetSourceSet.getApiConfigurationName()); - featureConfiguration.extendsFrom(mainGeneratedDataTemplateApi); - mainGeneratedDataTemplateApi.extendsFrom( - getDataModelConfig(project, targetSourceSet), - project.getConfigurations().getByName("dataTemplateCompile")); - - // Configure the existing IvyPublication - // For backwards-compatibility, make the legacy dataTemplate/testDataTemplate configurations extend - // their replacements, auto-created when we registered the new feature variant - project.afterEvaluate(p -> { - PublishingExtension publishing = p.getExtensions().getByType(PublishingExtension.class); - // When configuring a Gradle Publication, use this value to find the name of the publication to configure. Defaults to "ivy". - String publicationName = p.getExtensions().getExtraProperties().getProperties().getOrDefault("PegasusPublicationName", "ivy").toString(); - IvyPublication ivyPublication = publishing.getPublications().withType(IvyPublication.class).getByName(publicationName); - ivyPublication.configurations(configurations -> configurations.create(featureName, legacyConfiguration -> { - legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getApiElementsConfigurationName()).getName()); - legacyConfiguration.extend(p.getConfigurations().getByName(targetSourceSet.getRuntimeElementsConfigurationName()).getName()); - })); - }); - }); - } - - if (debug) - { - System.out.println("configureDataTemplateGeneration sourceSet " + sourceSet.getName()); - System.out.println(compileConfigName + ".allDependencies : " - + project.getConfigurations().getByName(compileConfigName).getAllDependencies()); - System.out.println(compileConfigName + ".extendsFrom: " - + project.getConfigurations().getByName(compileConfigName).getExtendsFrom()); - System.out.println(compileConfigName + ".transitive: " - + project.getConfigurations().getByName(compileConfigName).isTransitive()); - } - - project.getTasks().getByName(sourceSet.getCompileJavaTaskName()).dependsOn(dataTemplateJarTask); - return generateDataTemplatesTask; - } - - private String mapSourceSetToFeatureName(SourceSet sourceSet) { - String featureName = ""; - switch (sourceSet.getName()) { - case "mainGeneratedDataTemplate": - featureName = "dataTemplate"; - break; - case "testGeneratedDataTemplate": - featureName = "testDataTemplate"; - break; - case "mainGeneratedRest": - featureName = "restClient"; - break; - case "testGeneratedRest": - featureName = "testRestClient"; - break; - case "mainGeneratedAvroSchema": - featureName = "avroSchema"; - break; - case "testGeneratedAvroSchema": - featureName = "testAvroSchema"; - break; - default: - String msg = String.format("Unable to map %s to an appropriate feature name", sourceSet); - throw new GradleException(msg); - } - return featureName; - } - - // Generate rest client from idl files generated from java source files in the specified source set. - // - // This generates rest client source files from idl file generated from java source files - // in the source set. The generated rest client source files will be in a new source set. - // It also compiles the rest client source files into classes, and creates both the - // rest model and rest client jar files. - // - @SuppressWarnings("deprecation") - protected void configureRestClientGeneration(Project project, SourceSet sourceSet) - { - // idl directory for api project - File idlDir = project.file(getIdlPath(project, sourceSet)); - if (SharedFileUtils.getSuffixedFiles(project, idlDir, IDL_FILE_SUFFIX).isEmpty() && !isPropertyTrue(project, - PROCESS_EMPTY_IDL_DIR)) - { - return; - } - File generatedRestClientDir = project.file(getGeneratedDirPath(project, sourceSet, REST_GEN_TYPE) - + File.separatorChar + "java"); - - // always include imported data template jars in compileClasspath of rest client - FileCollection dataModelConfig = getDataModelConfig(project, sourceSet); - - // if data templates generated from this source set, add the generated data template jar to compileClasspath - // of rest client. - String dataTemplateSourceSetName = getGeneratedSourceSetName(sourceSet, DATA_TEMPLATE_GEN_TYPE); - - Jar dataTemplateJarTask = null; - - SourceSetContainer sourceSets = project.getConvention() - .getPlugin(JavaPluginConvention.class).getSourceSets(); - - FileCollection dataModels; - if (sourceSets.findByName(dataTemplateSourceSetName) != null) - { - if (debug) - { - System.out.println("sourceSet " + sourceSet.getName() + " has generated sourceSet " + dataTemplateSourceSetName); - } - dataTemplateJarTask = (Jar) project.getTasks().getByName(sourceSet.getName() + "DataTemplateJar"); - // The getArchivePath() API doesn’t carry any task dependency and has been deprecated. - // Replace it with getArchiveFile() on Gradle 7, - // but keep getArchivePath() to be backwards-compatibility with Gradle version older than 5.1 - // DataHub Note - applied FIXME - dataModels = dataModelConfig.plus(project.files( - isAtLeastGradle7() ? dataTemplateJarTask.getArchiveFile() : dataTemplateJarTask.getArchivePath())); - } - else - { - dataModels = dataModelConfig; - } - - // create source set for generated rest model, rest client source and class files. - String targetSourceSetName = getGeneratedSourceSetName(sourceSet, REST_GEN_TYPE); - SourceSet targetSourceSet = sourceSets.create(targetSourceSetName, ss -> - { - ss.java(sourceDirectorySet -> sourceDirectorySet.srcDir(generatedRestClientDir)); - ss.setCompileClasspath(dataModels.plus(project.getConfigurations().getByName("restClientCompile"))); - }); - - project.getPlugins().withType(EclipsePlugin.class, eclipsePlugin -> { - EclipseModel eclipseModel = (EclipseModel) project.getExtensions().findByName("eclipse"); - eclipseModel.getClasspath().getPlusConfigurations() - .add(project.getConfigurations().getByName("restClientCompile")); - }); - - // idea plugin needs to know about new rest client source directory and its dependencies - addGeneratedDir(project, targetSourceSet, Arrays.asList( - getDataModelConfig(project, sourceSet), - project.getConfigurations().getByName("restClientCompile"))); - - // generate the rest client source files - GenerateRestClientTask generateRestClientTask = project.getTasks() - .create(targetSourceSet.getTaskName("generate", "restClient"), GenerateRestClientTask.class, task -> - { - task.dependsOn(project.getConfigurations().getByName("dataTemplate")); - task.setInputDir(idlDir); - task.setResolverPath(dataModels.plus(project.getConfigurations().getByName("restClientCompile"))); - task.setRuntimeClasspath(project.getConfigurations().getByName("dataModel") - .plus(project.getConfigurations().getByName("dataTemplate").getArtifacts().getFiles())); - task.setCodegenClasspath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setDestinationDir(generatedRestClientDir); - task.setRestli2FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_2)); - task.setRestli1FormatSuppressed(project.hasProperty(SUPPRESS_REST_CLIENT_RESTLI_1)); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - if (isPropertyTrue(project, CODE_GEN_PATH_CASE_SENSITIVE)) - { - task.setGenerateLowercasePath(false); - } - if (isPropertyTrue(project, ENABLE_FLUENT_API)) - { - task.setGenerateFluentApi(true); - } - task.doFirst(new CacheableAction<>(t -> project.delete(generatedRestClientDir))); - }); - - if (dataTemplateJarTask != null) - { - generateRestClientTask.dependsOn(dataTemplateJarTask); - } - - // TODO: Tighten the types so that _generateSourcesJarTask must be of type Jar. - ((Jar) _generateSourcesJarTask).from(generateRestClientTask.getDestinationDir()); - _generateSourcesJarTask.dependsOn(generateRestClientTask); - - _generateJavadocTask.source(generateRestClientTask.getDestinationDir()); - _generateJavadocTask.setClasspath(_generateJavadocTask.getClasspath() - .plus(project.getConfigurations().getByName("restClientCompile")) - .plus(generateRestClientTask.getResolverPath())); - _generateJavadocTask.dependsOn(generateRestClientTask); - - // make sure rest client source files have been generated before compiling them - JavaCompile compileGeneratedRestClientTask = (JavaCompile) project.getTasks() - .getByName(targetSourceSet.getCompileJavaTaskName()); - compileGeneratedRestClientTask.dependsOn(generateRestClientTask); - compileGeneratedRestClientTask.getOptions().getCompilerArgs().add("-Xlint:-deprecation"); - - // create the rest model jar file - Task restModelJarTask = project.getTasks().create(sourceSet.getName() + "RestModelJar", Jar.class, task -> - { - task.from(idlDir, copySpec -> - { - copySpec.eachFile(fileCopyDetails -> project.getLogger() - .info("Add idl file: {}", fileCopyDetails)); - copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX)); - }); - // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-model")); - task.setDescription("Generate rest model jar"); - }); - - // create the rest client jar file - Task restClientJarTask = project.getTasks() - .create(sourceSet.getName() + "RestClientJar", Jar.class, task -> - { - task.dependsOn(compileGeneratedRestClientTask); - task.from(idlDir, copySpec -> { - copySpec.eachFile(fileCopyDetails -> { - project.getLogger().info("Add interface file: {}", fileCopyDetails); - fileCopyDetails.setPath("idl" + File.separatorChar + fileCopyDetails.getPath()); - }); - copySpec.setIncludes(Collections.singletonList('*' + IDL_FILE_SUFFIX)); - }); - task.from(targetSourceSet.getOutput()); - // FIXME change to #getArchiveAppendix().set(...); breaks backwards-compatibility before 5.1 - // DataHub Note - applied FIXME - task.getArchiveAppendix().set(getAppendix(sourceSet, "rest-client")); - task.setDescription("Generate rest client jar"); - }); - - // add the rest model jar and the rest client jar to the list of project artifacts. - if (!isTestSourceSet(sourceSet)) - { - project.getArtifacts().add("restModel", restModelJarTask); - project.getArtifacts().add("restClient", restClientJarTask); - } - else - { - project.getArtifacts().add("testRestModel", restModelJarTask); - project.getArtifacts().add("testRestClient", restClientJarTask); - } - } - - // Return the appendix for generated jar files. - // The source set name is not included for the main source set. - private static String getAppendix(SourceSet sourceSet, String suffix) - { - return sourceSet.getName().equals("main") ? suffix : sourceSet.getName() + '-' + suffix; - } - - private static Project getApiProject(Project project) - { - if (project.getExtensions().getExtraProperties().has("apiProject")) - { - return (Project) project.getExtensions().getExtraProperties().get("apiProject"); - } - - List subsSuffixes; - if (project.getExtensions().getExtraProperties().has("apiProjectSubstitutionSuffixes")) - { - @SuppressWarnings("unchecked") - List suffixValue = (List) project.getExtensions() - .getExtraProperties().get("apiProjectSubstitutionSuffixes"); - - subsSuffixes = suffixValue; - } - else - { - subsSuffixes = Arrays.asList("-impl", "-service", "-server", "-server-impl"); - } - - for (String suffix : subsSuffixes) - { - if (project.getPath().endsWith(suffix)) - { - String searchPath = project.getPath().substring(0, project.getPath().length() - suffix.length()) + "-api"; - Project apiProject = project.findProject(searchPath); - if (apiProject != null) - { - return apiProject; - } - } - } - - return project.findProject(project.getPath() + "-api"); - } - - private static Project getCheckedApiProject(Project project) - { - Project apiProject = getApiProject(project); - - if (apiProject == project) - { - throw new GradleException("The API project of ${project.path} must not be itself."); - } - - return apiProject; - } - - /** - * return the property value if the property exists and is not empty (-Pname=value) - * return null if property does not exist or the property is empty (-Pname) - * - * @param project the project where to look for the property - * @param propertyName the name of the property - */ - public static String getNonEmptyProperty(Project project, String propertyName) - { - if (!project.hasProperty(propertyName)) - { - return null; - } - - String propertyValue = project.property(propertyName).toString(); - if (propertyValue.isEmpty()) - { - return null; - } - - return propertyValue; - } - - /** - * Return true if the given property exists and its value is true - * - * @param project the project where to look for the property - * @param propertyName the name of the property - */ - public static boolean isPropertyTrue(Project project, String propertyName) - { - return project.hasProperty(propertyName) && Boolean.valueOf(project.property(propertyName).toString()); - } - - private static String createModifiedFilesMessage(Collection nonEquivExpectedFiles, - Collection foldersToBeBuilt) - { - StringBuilder builder = new StringBuilder(); - builder.append("\nRemember to checkin the changes to the following new or modified files:\n"); - for (String file : nonEquivExpectedFiles) - { - builder.append(" "); - builder.append(file); - builder.append("\n"); - } - - if (!foldersToBeBuilt.isEmpty()) - { - builder.append("\nThe file modifications include service interface changes, you can build the the following projects " - + "to re-generate the client APIs accordingly:\n"); - for (String folder : foldersToBeBuilt) - { - builder.append(" "); - builder.append(folder); - builder.append("\n"); - } - } - - return builder.toString(); - } - - private static String createPossibleMissingFilesMessage(Collection missingFiles) - { - StringBuilder builder = new StringBuilder(); - builder.append("If this is the result of an automated build, then you may have forgotten to check in some snapshot or idl files:\n"); - for (String file : missingFiles) - { - builder.append(" "); - builder.append(file); - builder.append("\n"); - } - - return builder.toString(); - } - - private static String findProperty(FileCompatibilityType type) - { - String property; - switch (type) - { - case SNAPSHOT: - property = SNAPSHOT_COMPAT_REQUIREMENT; - break; - case IDL: - property = IDL_COMPAT_REQUIREMENT; - break; - case PEGASUS_SCHEMA_SNAPSHOT: - property = PEGASUS_SCHEMA_SNAPSHOT_REQUIREMENT; - break; - case PEGASUS_EXTENSION_SCHEMA_SNAPSHOT: - property = PEGASUS_EXTENSION_SCHEMA_SNAPSHOT_REQUIREMENT; - break; - default: - throw new GradleException("No property defined for compatibility type " + type); - } - return property; - } - - private static Set buildWatchedRestModelInputDirs(Project project, SourceSet sourceSet) { - @SuppressWarnings("unchecked") - Map pegasusOptions = (Map) project - .getExtensions().getExtraProperties().get("pegasus"); - - File rootPath = new File(project.getProjectDir(), - pegasusOptions.get(sourceSet.getName()).restModelOptions.getRestResourcesRootPath()); - - IdlOptions idlOptions = pegasusOptions.get(sourceSet.getName()).idlOptions; - - // if idlItems exist, only watch the smaller subset - return idlOptions.getIdlItems().stream() - .flatMap(idlItem -> Arrays.stream(idlItem.packageNames)) - .map(packageName -> new File(rootPath, packageName.replace('.', '/'))) - .collect(Collectors.toCollection(TreeSet::new)); - } - - private static Set difference(Set left, Set right) - { - Set result = new HashSet<>(left); - result.removeAll(right); - return result; - } - - /** - * Configures the given source set so that its data schema directory (usually 'pegasus') is marked as a resource root. - * The purpose of this is to improve the IDE experience. Makes sure to exclude this directory from being packaged in - * with the default Jar task. - */ - private static void configureDataSchemaResourcesRoot(Project project, SourceSet sourceSet) - { - sourceSet.resources(sourceDirectorySet -> { - final String dataSchemaPath = getDataSchemaPath(project, sourceSet); - final File dataSchemaRoot = project.file(dataSchemaPath); - sourceDirectorySet.srcDir(dataSchemaPath); - project.getLogger().info("Adding resource root '{}'", dataSchemaPath); - - final String extensionsSchemaPath = getExtensionSchemaPath(project, sourceSet); - final File extensionsSchemaRoot = project.file(extensionsSchemaPath); - sourceDirectorySet.srcDir(extensionsSchemaPath); - project.getLogger().info("Adding resource root '{}'", extensionsSchemaPath); - - // Exclude the data schema and extensions schema directory from being copied into the default Jar task - sourceDirectorySet.getFilter().exclude(fileTreeElement -> { - final File file = fileTreeElement.getFile(); - // Traversal starts with the children of a resource root, so checking the direct parent is sufficient - final boolean underDataSchemaRoot = dataSchemaRoot.equals(file.getParentFile()); - final boolean underExtensionsSchemaRoot = extensionsSchemaRoot.equals(file.getParentFile()); - final boolean exclude = (underDataSchemaRoot || underExtensionsSchemaRoot); - if (exclude) - { - project.getLogger().info("Excluding resource directory '{}'", file); - } - return exclude; - }); - }); - } - - private Task generatePegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, File inputDir, File outputDir, - boolean isExtensionSchema) - { - return project.getTasks().create(sourceSet.getTaskName("generate", taskName), - GeneratePegasusSnapshotTask.class, task -> - { - task.setInputDir(inputDir); - task.setResolverPath(getDataModelConfig(project, sourceSet).plus(project.files(getDataSchemaPath(project, sourceSet)))); - task.setClassPath(project.getConfigurations().getByName(PEGASUS_PLUGIN_CONFIGURATION)); - task.setPegasusSchemaSnapshotDestinationDir(outputDir); - task.setExtensionSchema(isExtensionSchema); - if (isPropertyTrue(project, ENABLE_ARG_FILE)) - { - task.setEnableArgFile(true); - } - }); - } - - private Task publishPegasusSchemaSnapshot(Project project, SourceSet sourceSet, String taskName, Task checkPegasusSnapshotTask, - File inputDir, File outputDir) - { - return project.getTasks().create(sourceSet.getTaskName("publish", taskName), - Sync.class, task -> - { - task.dependsOn(checkPegasusSnapshotTask); - task.from(inputDir); - task.into(outputDir); - task.onlyIf(t -> !SharedFileUtils.getSuffixedFiles(project, inputDir, PDL_FILE_SUFFIX).isEmpty()); - }); - } - - private void checkGradleVersion(Project project) - { - if (MIN_REQUIRED_VERSION.compareTo(GradleVersion.current()) > 0) - { - throw new GradleException(String.format("This plugin does not support %s. Please use %s or later.", - GradleVersion.current(), - MIN_REQUIRED_VERSION)); - } - if (MIN_SUGGESTED_VERSION.compareTo(GradleVersion.current()) > 0) - { - project.getLogger().warn(String.format("Pegasus supports %s, but it may not be supported in the next major release. Please use %s or later.", - GradleVersion.current(), - MIN_SUGGESTED_VERSION)); - } - } - - /** - * Reflection is necessary to obscure types introduced in Gradle 5.3 - * - * @param sourceSet the target sourceset upon which to create a new feature variant - * @return an Action which modifies a org.gradle.api.plugins.FeatureSpec instance - */ - private Action/**/ createFeatureVariantFromSourceSet(SourceSet sourceSet) - { - return featureSpec -> { - try - { - Class clazz = Class.forName("org.gradle.api.plugins.FeatureSpec"); - Method usingSourceSet = clazz.getDeclaredMethod("usingSourceSet", SourceSet.class); - usingSourceSet.invoke(featureSpec, sourceSet); - } - catch (ReflectiveOperationException e) - { - throw new GradleException("Unable to invoke FeatureSpec#usingSourceSet(SourceSet)", e); - } - }; - } - - protected static boolean isAtLeastGradle61() - { - return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("6.1")) >= 0; - } - - public static boolean isAtLeastGradle7() { - return GradleVersion.current().getBaseVersion().compareTo(GradleVersion.version("7.0")) >= 0; - } -} \ No newline at end of file diff --git a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java b/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java deleted file mode 100644 index a2aafaf1be017..0000000000000 --- a/buildSrc/src/main/java/com/linkedin/pegasus/gradle/tasks/ChangedFileReportTask.java +++ /dev/null @@ -1,124 +0,0 @@ -package com.linkedin.pegasus.gradle.tasks; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashSet; -import java.util.Set; -import java.util.function.Consumer; -import java.util.stream.Collectors; -import org.gradle.api.DefaultTask; -import org.gradle.api.file.FileCollection; -import org.gradle.api.specs.Specs; -import org.gradle.api.tasks.InputFiles; -import org.gradle.api.tasks.Internal; -import org.gradle.api.tasks.SkipWhenEmpty; -import org.gradle.api.tasks.TaskAction; -import org.gradle.work.FileChange; -import org.gradle.work.InputChanges; - - -public class ChangedFileReportTask extends DefaultTask -{ - private final Collection _needCheckinFiles = new ArrayList<>(); - - private FileCollection _idlFiles = getProject().files(); - private FileCollection _snapshotFiles = getProject().files(); - - public ChangedFileReportTask() - { - //with Gradle 6.0, Declaring an incremental task without outputs is not allowed. - getOutputs().upToDateWhen(Specs.satisfyNone()); - } - - // DataHub Note - updated for InputChanges - @TaskAction - public void checkFilesForChanges(InputChanges inputChanges) - { - getLogger().lifecycle("Checking idl and snapshot files for changes..."); - getLogger().info("idlFiles: " + _idlFiles.getAsPath()); - getLogger().info("snapshotFiles: " + _snapshotFiles.getAsPath()); - - Set filesRemoved = new HashSet<>(); - Set filesAdded = new HashSet<>(); - Set filesChanged = new HashSet<>(); - - if (inputChanges.isIncremental()) - { - Consumer handleChange = change -> - { - switch (change.getChangeType()) { - case ADDED: - filesAdded.add(change.getFile().getAbsolutePath()); - break; - case REMOVED: - filesRemoved.add(change.getFile().getAbsolutePath()); - break; - case MODIFIED: - filesChanged.add(change.getFile().getAbsolutePath()); - break; - } - }; - - inputChanges.getFileChanges(_idlFiles).forEach(handleChange); - inputChanges.getFileChanges(_snapshotFiles).forEach(handleChange); - - if (!filesRemoved.isEmpty()) - { - String files = joinByComma(filesRemoved); - _needCheckinFiles.add(files); - getLogger().lifecycle( - "The following files have been removed, be sure to remove them from source control: {}", files); - } - - if (!filesAdded.isEmpty()) - { - String files = joinByComma(filesAdded); - _needCheckinFiles.add(files); - getLogger().lifecycle("The following files have been added, be sure to add them to source control: {}", files); - } - - if (!filesChanged.isEmpty()) - { - String files = joinByComma(filesChanged); - _needCheckinFiles.add(files); - getLogger().lifecycle( - "The following files have been changed, be sure to commit the changes to source control: {}", files); - } - } - } - - private String joinByComma(Set files) - { - return files.stream().collect(Collectors.joining(", ")); - } - - @InputFiles - @SkipWhenEmpty - public FileCollection getSnapshotFiles() - { - return _snapshotFiles; - } - - public void setSnapshotFiles(FileCollection snapshotFiles) - { - _snapshotFiles = snapshotFiles; - } - - @InputFiles - @SkipWhenEmpty - public FileCollection getIdlFiles() - { - return _idlFiles; - } - - public void setIdlFiles(FileCollection idlFiles) - { - _idlFiles = idlFiles; - } - - @Internal - public Collection getNeedCheckinFiles() - { - return _needCheckinFiles; - } -} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index e3eef0688c269..28b3a982c7b28 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -2436,7 +2436,9 @@ private void configureAssertionResolvers(final RuntimeWiring.Builder builder) { ? assertion.getDataPlatformInstance().getUrn() : null; })) - .dataFetcher("runEvents", new AssertionRunEventResolver(entityClient))); + .dataFetcher("runEvents", new AssertionRunEventResolver(entityClient)) + .dataFetcher( + "aspects", new WeaklyTypedAspectsResolver(entityClient, entityRegistry))); } private void configurePolicyResolvers(final RuntimeWiring.Builder builder) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java index d8665ae784bd1..fd23cd5fdda45 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/WeaklyTypedAspectsResolver.java @@ -37,7 +37,12 @@ public class WeaklyTypedAspectsResolver implements DataFetcher get(DataFetchingEnvironment environmen final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; final String query = input.getQuery() != null ? input.getQuery() : "*"; + final SearchFlags searchFlags = mapInputFlags(input.getSearchFlags()); // escape forward slash since it is a reserved character in Elasticsearch final String sanitizedQuery = ResolverUtils.escapeForwardSlash(query); @@ -83,7 +85,8 @@ public CompletableFuture get(DataFetchingEnvironment environmen sanitizedQuery, start, count, - context.getAuthentication()); + context.getAuthentication(), + searchFlags); return mapBrowseResults(browseResults); } catch (Exception e) { throw new RuntimeException("Failed to execute browse V2", e); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 0074dc3fcb44c..762514b480fca 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1763,6 +1763,12 @@ input AspectParams { Only fetch auto render aspects """ autoRenderOnly: Boolean + + """ + Fetch using aspect names + If absent, returns all aspects matching other inputs + """ + aspectNames: [String!] } @@ -6788,6 +6794,12 @@ type Assertion implements EntityWithRelationships & Entity { Edges extending from this entity grouped by direction in the lineage graph """ lineage(input: LineageInput!): EntityLineageResult + + """ + Experimental API. + For fetching extra aspects that do not have custom UI code yet + """ + aspects(input: AspectParams): [RawAspect!] } """ diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql index 2b921601058fb..a906362cee185 100644 --- a/datahub-graphql-core/src/main/resources/search.graphql +++ b/datahub-graphql-core/src/main/resources/search.graphql @@ -1230,6 +1230,11 @@ input BrowseV2Input { The search query string """ query: String + + """ + Flags controlling search options + """ + searchFlags: SearchFlags } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java index c565e771a0475..41797fac636f1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.browse.BrowseResultGroupV2Array; import com.linkedin.metadata.browse.BrowseResultMetadata; import com.linkedin.metadata.browse.BrowseResultV2; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -262,7 +263,8 @@ private static EntityClient initMockEntityClient( Mockito.eq(query), Mockito.eq(start), Mockito.eq(limit), - Mockito.any(Authentication.class))) + Mockito.any(Authentication.class), + Mockito.nullable(SearchFlags.class))) .thenReturn(result); return client; } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java index ff8bd542fbdff..50847da07be73 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/UpgradeCliApplication.java @@ -2,6 +2,10 @@ import com.linkedin.gms.factory.auth.AuthorizerChainFactory; import com.linkedin.gms.factory.auth.DataHubAuthorizerFactory; +import com.linkedin.gms.factory.graphql.GraphQLEngineFactory; +import com.linkedin.gms.factory.kafka.KafkaEventConsumerFactory; +import com.linkedin.gms.factory.kafka.SimpleKafkaConsumerFactory; +import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; import org.springframework.boot.WebApplicationType; import org.springframework.boot.autoconfigure.SpringBootApplication; @@ -24,7 +28,11 @@ classes = { ScheduledAnalyticsFactory.class, AuthorizerChainFactory.class, - DataHubAuthorizerFactory.class + DataHubAuthorizerFactory.class, + SimpleKafkaConsumerFactory.class, + KafkaEventConsumerFactory.class, + InternalSchemaRegistryFactory.class, + GraphQLEngineFactory.class }) }) public class UpgradeCliApplication { diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java index 406963c58fd71..2b2f4648f76e7 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/BackfillBrowsePathsV2Config.java @@ -3,6 +3,7 @@ import com.linkedin.datahub.upgrade.system.entity.steps.BackfillBrowsePathsV2; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.search.SearchService; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -11,7 +12,12 @@ public class BackfillBrowsePathsV2Config { @Bean public BackfillBrowsePathsV2 backfillBrowsePathsV2( - EntityService entityService, SearchService searchService) { - return new BackfillBrowsePathsV2(entityService, searchService); + EntityService entityService, + SearchService searchService, + @Value("${systemUpdate.browsePathsV2.enabled}") final boolean enabled, + @Value("${systemUpdate.browsePathsV2.reprocess.enabled}") final boolean reprocessEnabled, + @Value("${systemUpdate.browsePathsV2.batchSize}") final Integer batchSize) { + return new BackfillBrowsePathsV2( + entityService, searchService, enabled, reprocessEnabled, batchSize); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java index 06311e1853874..83dad80944f5f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java @@ -2,6 +2,7 @@ import com.linkedin.datahub.upgrade.system.via.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.EntityService; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -9,7 +10,10 @@ public class ReindexDataJobViaNodesCLLConfig { @Bean - public ReindexDataJobViaNodesCLL _reindexDataJobViaNodesCLL(EntityService entityService) { - return new ReindexDataJobViaNodesCLL(entityService); + public ReindexDataJobViaNodesCLL _reindexDataJobViaNodesCLL( + EntityService entityService, + @Value("${systemUpdate.dataJobNodeCLL.enabled}") final boolean enabled, + @Value("${systemUpdate.dataJobNodeCLL.batchSize}") final Integer batchSize) { + return new ReindexDataJobViaNodesCLL(entityService, enabled, batchSize); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java new file mode 100644 index 0000000000000..ea432dfa9f7df --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateCondition.java @@ -0,0 +1,14 @@ +package com.linkedin.datahub.upgrade.config; + +import org.springframework.boot.ApplicationArguments; +import org.springframework.context.annotation.Condition; +import org.springframework.context.annotation.ConditionContext; +import org.springframework.core.type.AnnotatedTypeMetadata; + +public class SystemUpdateCondition implements Condition { + @Override + public boolean matches(ConditionContext context, AnnotatedTypeMetadata metadata) { + return context.getBeanFactory().getBean(ApplicationArguments.class).getNonOptionArgs().stream() + .anyMatch("SystemUpdate"::equals); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index 177d4b531ba86..cde3a29248fb5 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -8,6 +8,7 @@ import com.linkedin.gms.factory.common.TopicConventionFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.kafka.DataHubKafkaProducerFactory; +import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.dao.producer.KafkaEventProducer; @@ -21,9 +22,12 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.autoconfigure.kafka.KafkaProperties; import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; @Slf4j @Configuration @@ -74,4 +78,23 @@ protected KafkaEventProducer duheKafkaEventProducer( duheSchemaRegistryConfig, kafkaConfiguration, properties)); return new KafkaEventProducer(producer, topicConvention, kafkaHealthChecker); } + + /** + * The ReindexDataJobViaNodesCLLConfig step requires publishing to MCL. Overriding the default + * producer with this special producer which doesn't require an active registry. + * + *

Use when INTERNAL registry and is SYSTEM_UPDATE + * + *

This forces this producer into the EntityService + */ + @Primary + @Bean(name = "kafkaEventProducer") + @Conditional(SystemUpdateCondition.class) + @ConditionalOnProperty( + name = "kafka.schemaRegistry.type", + havingValue = InternalSchemaRegistryFactory.TYPE) + protected KafkaEventProducer kafkaEventProducer( + @Qualifier("duheKafkaEventProducer") KafkaEventProducer kafkaEventProducer) { + return kafkaEventProducer; + } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java index 4b9fc5bba0204..9b023e1e239a2 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2.java @@ -11,8 +11,20 @@ public class BackfillBrowsePathsV2 implements Upgrade { private final List _steps; - public BackfillBrowsePathsV2(EntityService entityService, SearchService searchService) { - _steps = ImmutableList.of(new BackfillBrowsePathsV2Step(entityService, searchService)); + public BackfillBrowsePathsV2( + EntityService entityService, + SearchService searchService, + boolean enabled, + boolean reprocessEnabled, + Integer batchSize) { + if (enabled) { + _steps = + ImmutableList.of( + new BackfillBrowsePathsV2Step( + entityService, searchService, reprocessEnabled, batchSize)); + } else { + _steps = ImmutableList.of(); + } } @Override diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java index 601ce4d25493c..2d64e0052ae82 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/entity/steps/BackfillBrowsePathsV2Step.java @@ -16,6 +16,7 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; +import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Condition; @@ -37,9 +38,8 @@ @Slf4j public class BackfillBrowsePathsV2Step implements UpgradeStep { - public static final String BACKFILL_BROWSE_PATHS_V2 = "BACKFILL_BROWSE_PATHS_V2"; - public static final String REPROCESS_DEFAULT_BROWSE_PATHS_V2 = - "REPROCESS_DEFAULT_BROWSE_PATHS_V2"; + private static final String UPGRADE_ID = "BackfillBrowsePathsV2Step"; + private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); public static final String DEFAULT_BROWSE_PATH_V2 = "␟Default"; private static final Set ENTITY_TYPES_TO_MIGRATE = @@ -53,14 +53,22 @@ public class BackfillBrowsePathsV2Step implements UpgradeStep { Constants.ML_MODEL_GROUP_ENTITY_NAME, Constants.ML_FEATURE_TABLE_ENTITY_NAME, Constants.ML_FEATURE_ENTITY_NAME); - private static final Integer BATCH_SIZE = 5000; - private final EntityService _entityService; - private final SearchService _searchService; - - public BackfillBrowsePathsV2Step(EntityService entityService, SearchService searchService) { - _searchService = searchService; - _entityService = entityService; + private final EntityService entityService; + private final SearchService searchService; + + private final boolean reprocessEnabled; + private final Integer batchSize; + + public BackfillBrowsePathsV2Step( + EntityService entityService, + SearchService searchService, + boolean reprocessEnabled, + Integer batchSize) { + this.searchService = searchService; + this.entityService = entityService; + this.reprocessEnabled = reprocessEnabled; + this.batchSize = batchSize; } @Override @@ -78,11 +86,14 @@ public Function executable() { log.info( String.format( "Upgrading batch %s-%s of browse paths for entity type %s", - migratedCount, migratedCount + BATCH_SIZE, entityType)); + migratedCount, migratedCount + batchSize, entityType)); scrollId = backfillBrowsePathsV2(entityType, auditStamp, scrollId); - migratedCount += BATCH_SIZE; + migratedCount += batchSize; } while (scrollId != null); } + + BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService); + return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED); }; } @@ -91,27 +102,27 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S final Filter filter; - if (System.getenv().containsKey(REPROCESS_DEFAULT_BROWSE_PATHS_V2) - && Boolean.parseBoolean(System.getenv(REPROCESS_DEFAULT_BROWSE_PATHS_V2))) { + if (reprocessEnabled) { filter = backfillDefaultBrowsePathsV2Filter(); } else { filter = backfillBrowsePathsV2Filter(); } final ScrollResult scrollResult = - _searchService.scrollAcrossEntities( + searchService.scrollAcrossEntities( ImmutableList.of(entityType), "*", filter, null, scrollId, null, - BATCH_SIZE, + batchSize, new SearchFlags() .setFulltext(true) .setSkipCache(true) .setSkipHighlighting(true) .setSkipAggregates(true)); + if (scrollResult.getNumEntities() == 0 || scrollResult.getEntities().size() == 0) { return null; } @@ -183,7 +194,7 @@ private Filter backfillDefaultBrowsePathsV2Filter() { private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exception { BrowsePathsV2 browsePathsV2 = - DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, _entityService); + DefaultAspectsUtil.buildDefaultBrowsePathV2(urn, true, entityService); log.debug(String.format("Adding browse path v2 for urn %s with value %s", urn, browsePathsV2)); MetadataChangeProposal proposal = new MetadataChangeProposal(); proposal.setEntityUrn(urn); @@ -193,12 +204,12 @@ private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exceptio proposal.setSystemMetadata( new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); proposal.setAspect(GenericRecordUtils.serializeAspect(browsePathsV2)); - _entityService.ingestProposal(proposal, auditStamp, true); + entityService.ingestProposal(proposal, auditStamp, true); } @Override public String id() { - return "BackfillBrowsePathsV2Step"; + return UPGRADE_ID; } /** @@ -211,7 +222,22 @@ public boolean isOptional() { } @Override + /** + * Returns whether the upgrade should be skipped. Uses previous run history or the environment + * variables REPROCESS_DEFAULT_BROWSE_PATHS_V2 & BACKFILL_BROWSE_PATHS_V2 to determine whether to + * skip. + */ public boolean skip(UpgradeContext context) { - return !Boolean.parseBoolean(System.getenv(BACKFILL_BROWSE_PATHS_V2)); + boolean envEnabled = Boolean.parseBoolean(System.getenv("BACKFILL_BROWSE_PATHS_V2")); + + if (reprocessEnabled && envEnabled) { + return false; + } + + boolean previouslyRun = entityService.exists(UPGRADE_ID_URN, true); + if (previouslyRun) { + log.info("{} was already run. Skipping.", id()); + } + return (previouslyRun || !envEnabled); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java index 41179a50c4b54..59975693322d1 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLL.java @@ -18,8 +18,13 @@ public class ReindexDataJobViaNodesCLL implements Upgrade { private final List _steps; - public ReindexDataJobViaNodesCLL(EntityService entityService) { - _steps = ImmutableList.of(new ReindexDataJobViaNodesCLLStep(entityService)); + public ReindexDataJobViaNodesCLL( + EntityService entityService, boolean enabled, Integer batchSize) { + if (enabled) { + _steps = ImmutableList.of(new ReindexDataJobViaNodesCLLStep(entityService, batchSize)); + } else { + _steps = ImmutableList.of(); + } } @Override diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java index 70afbc3d205b2..56166caf5b57e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/via/ReindexDataJobViaNodesCLLStep.java @@ -11,7 +11,6 @@ import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; -import java.net.URISyntaxException; import java.util.function.Function; import lombok.extern.slf4j.Slf4j; @@ -21,12 +20,12 @@ public class ReindexDataJobViaNodesCLLStep implements UpgradeStep { private static final String UPGRADE_ID = "via-node-cll-reindex-datajob"; private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); - private static final Integer BATCH_SIZE = 5000; + private final EntityService entityService; + private final Integer batchSize; - private final EntityService _entityService; - - public ReindexDataJobViaNodesCLLStep(EntityService entityService) { - _entityService = entityService; + public ReindexDataJobViaNodesCLLStep(EntityService entityService, Integer batchSize) { + this.entityService = entityService; + this.batchSize = batchSize; } @Override @@ -35,17 +34,16 @@ public Function executable() { RestoreIndicesArgs args = new RestoreIndicesArgs() .setAspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) - .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%"); + .setUrnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") + .setBatchSize(batchSize); RestoreIndicesResult result = - _entityService.restoreIndices(args, x -> context.report().addLine((String) x)); + entityService.restoreIndices(args, x -> context.report().addLine((String) x)); context.report().addLine("Rows migrated: " + result.rowsMigrated); context.report().addLine("Rows ignored: " + result.ignored); - try { - BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, _entityService); - context.report().addLine("State updated: " + UPGRADE_ID_URN); - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } + + BootstrapStep.setUpgradeResult(UPGRADE_ID_URN, entityService); + context.report().addLine("State updated: " + UPGRADE_ID_URN); + return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED); }; } @@ -70,7 +68,7 @@ public boolean isOptional() { * variable SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT to determine whether to skip. */ public boolean skip(UpgradeContext context) { - boolean previouslyRun = _entityService.exists(UPGRADE_ID_URN, true); + boolean previouslyRun = entityService.exists(UPGRADE_ID_URN, true); boolean envFlagRecommendsSkip = Boolean.parseBoolean(System.getenv("SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT")); if (previouslyRun) { diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java index 83b8e028727ce..4c9e12c0ed151 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNoSchemaRegistryTest.java @@ -4,6 +4,8 @@ import static org.testng.AssertJUnit.assertNotNull; import com.linkedin.datahub.upgrade.system.SystemUpdate; +import com.linkedin.metadata.dao.producer.KafkaEventProducer; +import com.linkedin.metadata.entity.EntityServiceImpl; import java.util.List; import java.util.Map; import java.util.Optional; @@ -19,19 +21,37 @@ classes = {UpgradeCliApplication.class, UpgradeCliApplicationTestConfiguration.class}, properties = { "kafka.schemaRegistry.type=INTERNAL", - "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic" - }) + "DATAHUB_UPGRADE_HISTORY_TOPIC_NAME=test_due_topic", + "METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=test_mcl_versioned_topic" + }, + args = {"-u", "SystemUpdate"}) public class DatahubUpgradeNoSchemaRegistryTest extends AbstractTestNGSpringContextTests { @Autowired @Named("systemUpdate") private SystemUpdate systemUpdate; + @Autowired + @Named("kafkaEventProducer") + private KafkaEventProducer kafkaEventProducer; + + @Autowired + @Named("duheKafkaEventProducer") + private KafkaEventProducer duheKafkaEventProducer; + + @Autowired private EntityServiceImpl entityService; + @Test public void testSystemUpdateInit() { assertNotNull(systemUpdate); } + @Test + public void testSystemUpdateKafkaProducerOverride() { + assertEquals(kafkaEventProducer, duheKafkaEventProducer); + assertEquals(entityService.get_producer(), duheKafkaEventProducer); + } + @Test public void testSystemUpdateSend() { UpgradeStepResult.Result result = diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java index be28b7f739cf5..5c2d6fff0f07c 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java @@ -1,15 +1,21 @@ package com.linkedin.datahub.upgrade; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.registry.SchemaRegistryService; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import io.ebean.Database; +import java.util.Optional; import org.springframework.boot.test.context.TestConfiguration; import org.springframework.boot.test.mock.mockito.MockBean; +import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Import; @TestConfiguration @@ -20,8 +26,6 @@ public class UpgradeCliApplicationTestConfiguration { @MockBean private Database ebeanServer; - @MockBean private EntityService _entityService; - @MockBean private SearchService searchService; @MockBean private GraphService graphService; @@ -31,4 +35,11 @@ public class UpgradeCliApplicationTestConfiguration { @MockBean ConfigEntityRegistry configEntityRegistry; @MockBean public EntityIndexBuilders entityIndexBuilders; + + @Bean + public SchemaRegistryService schemaRegistryService() { + SchemaRegistryService mockService = mock(SchemaRegistryService.class); + when(mockService.getSchemaIdForTopic(anyString())).thenReturn(Optional.of(0)); + return mockService; + } } diff --git a/docs-website/src/pages/slack/index.js b/docs-website/src/pages/slack/index.js index c85a1eefe5545..5989224191112 100644 --- a/docs-website/src/pages/slack/index.js +++ b/docs-website/src/pages/slack/index.js @@ -36,7 +36,7 @@ function SlackSurvey() {

Join the DataHub Slack Community!

-
We will send the link to join our Slack community to your email.
+
We'd love to find out a little more about you!
diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index da3a36bc87be5..94eb69a2ed827 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -135,6 +135,8 @@ conn_id = datahub_rest_default # or datahub_kafka_default | capture_ownership_info | true | If true, the owners field of the DAG will be capture as a DataHub corpuser. | | capture_tags_info | true | If true, the tags field of the DAG will be captured as DataHub tags. | | capture_executions | true | If true, we'll capture task runs in DataHub in addition to DAG definitions. | +| datajob_url_link | taskinstance | If taskinstance, the datajob url will be taskinstance link on airflow. It can also be grid. + | | graceful_exceptions | true | If set to true, most runtime errors in the lineage backend will be suppressed and will not cause the overall task to fail. Note that configuration issues will still throw exceptions. | #### Validate that the plugin is working diff --git a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java index 645c2fe210e09..adff32d5d336d 100644 --- a/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java +++ b/metadata-events/mxe-utils-avro/src/main/java/com/linkedin/metadata/EventUtils.java @@ -57,7 +57,7 @@ public class EventUtils { private static final Schema ORIGINAL_MCP_AVRO_SCHEMA = getAvroSchemaFromResource("avro/com/linkedin/mxe/MetadataChangeProposal.avsc"); - private static final Schema ORIGINAL_MCL_AVRO_SCHEMA = + public static final Schema ORIGINAL_MCL_AVRO_SCHEMA = getAvroSchemaFromResource("avro/com/linkedin/mxe/MetadataChangeLog.avsc"); private static final Schema ORIGINAL_FMCL_AVRO_SCHEMA = diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py index 67843da2ba995..48d462b85702a 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_config.py @@ -1,3 +1,4 @@ +from enum import Enum from typing import TYPE_CHECKING, Optional import datahub.emitter.mce_builder as builder @@ -8,6 +9,11 @@ from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook +class DatajobUrl(Enum): + GRID = "grid" + TASKINSTANCE = "taskinstance" + + class DatahubLineageConfig(ConfigModel): # This class is shared between the lineage backend and the Airflow plugin. # The defaults listed here are only relevant for the lineage backend. @@ -41,6 +47,8 @@ class DatahubLineageConfig(ConfigModel): # The Airflow plugin behaves as if it were set to True. graceful_exceptions: bool = True + datajob_url_link: DatajobUrl = DatajobUrl.TASKINSTANCE + def make_emitter_hook(self) -> "DatahubGenericHook": # This is necessary to avoid issues with circular imports. from datahub_airflow_plugin.hooks.datahub import DatahubGenericHook @@ -65,6 +73,9 @@ def get_lineage_config() -> DatahubLineageConfig: disable_openlineage_plugin = conf.get( "datahub", "disable_openlineage_plugin", fallback=True ) + datajob_url_link = conf.get( + "datahub", "datajob_url_link", fallback=DatajobUrl.TASKINSTANCE.value + ) return DatahubLineageConfig( enabled=enabled, @@ -77,4 +88,5 @@ def get_lineage_config() -> DatahubLineageConfig: log_level=log_level, debug_emitter=debug_emitter, disable_openlineage_plugin=disable_openlineage_plugin, + datajob_url_link=datajob_url_link, ) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index e1d53be7bae6b..2fa15f13e848b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -13,6 +13,7 @@ from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub_airflow_plugin._airflow_compat import AIRFLOW_PATCHED +from datahub_airflow_plugin._config import DatahubLineageConfig, DatajobUrl assert AIRFLOW_PATCHED @@ -208,6 +209,7 @@ def generate_datajob( set_dependencies: bool = True, capture_owner: bool = True, capture_tags: bool = True, + config: Optional[DatahubLineageConfig] = None, ) -> DataJob: """ @@ -217,6 +219,7 @@ def generate_datajob( :param set_dependencies: bool - whether to extract dependencies from airflow task :param capture_owner: bool - whether to extract owner from airflow task :param capture_tags: bool - whether to set tags automatically from airflow task + :param config: DatahubLineageConfig :return: DataJob - returns the generated DataJob object """ dataflow_urn = DataFlowUrn.create_from_ids( @@ -267,7 +270,11 @@ def generate_datajob( datajob.properties = job_property_bag base_url = conf.get("webserver", "base_url") - datajob.url = f"{base_url}/taskinstance/list/?flt1_dag_id_equals={datajob.flow_urn.get_flow_id()}&_flt_3_task_id={task.task_id}" + + if config and config.datajob_url_link == DatajobUrl.GRID: + datajob.url = f"{base_url}/dags/{datajob.flow_urn.get_flow_id()}/grid?task_id={task.task_id}" + else: + datajob.url = f"{base_url}/taskinstance/list/?flt1_dag_id_equals={datajob.flow_urn.get_flow_id()}&_flt_3_task_id={task.task_id}" if capture_owner and dag.owner: datajob.owners.add(dag.owner) @@ -290,9 +297,12 @@ def create_datajob_instance( task: "Operator", dag: "DAG", data_job: Optional[DataJob] = None, + config: Optional[DatahubLineageConfig] = None, ) -> DataProcessInstance: if data_job is None: - data_job = AirflowGenerator.generate_datajob(cluster, task=task, dag=dag) + data_job = AirflowGenerator.generate_datajob( + cluster, task=task, dag=dag, config=config + ) dpi = DataProcessInstance.from_datajob( datajob=data_job, id=task.task_id, clone_inlets=True, clone_outlets=True ) @@ -407,9 +417,12 @@ def run_datajob( datajob: Optional[DataJob] = None, attempt: Optional[int] = None, emit_templates: bool = True, + config: Optional[DatahubLineageConfig] = None, ) -> DataProcessInstance: if datajob is None: - datajob = AirflowGenerator.generate_datajob(cluster, ti.task, dag) + datajob = AirflowGenerator.generate_datajob( + cluster, ti.task, dag, config=config + ) assert dag_run.run_id dpi = DataProcessInstance.from_datajob( @@ -480,6 +493,7 @@ def complete_datajob( end_timestamp_millis: Optional[int] = None, result: Optional[InstanceRunResult] = None, datajob: Optional[DataJob] = None, + config: Optional[DatahubLineageConfig] = None, ) -> DataProcessInstance: """ @@ -491,10 +505,13 @@ def complete_datajob( :param end_timestamp_millis: Optional[int] :param result: Optional[str] One of the result from datahub.metadata.schema_class.RunResultTypeClass :param datajob: Optional[DataJob] + :param config: Optional[DatahubLineageConfig] :return: DataProcessInstance """ if datajob is None: - datajob = AirflowGenerator.generate_datajob(cluster, ti.task, dag) + datajob = AirflowGenerator.generate_datajob( + cluster, ti.task, dag, config=config + ) if end_timestamp_millis is None: if ti.end_date: diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index a7f588a166dde..475f3791bc0c8 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -376,6 +376,7 @@ def on_task_instance_running( dag=dag, capture_tags=self.config.capture_tags_info, capture_owner=self.config.capture_ownership_info, + config=self.config, ) # TODO: Make use of get_task_location to extract github urls. @@ -397,6 +398,7 @@ def on_task_instance_running( dag_run=dagrun, datajob=datajob, emit_templates=False, + config=self.config, ) logger.debug(f"Emitted DataHub DataProcess Instance start: {dpi}") @@ -419,6 +421,7 @@ def on_task_instance_finish( dag=dag, capture_tags=self.config.capture_tags_info, capture_owner=self.config.capture_ownership_info, + config=self.config, ) # Add lineage info. @@ -436,6 +439,7 @@ def on_task_instance_finish( dag_run=dagrun, datajob=datajob, result=status, + config=self.config, ) logger.debug( f"Emitted DataHub DataProcess Instance with status {status}: {dpi}" diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py index 51a4151bc8207..7b8d719712d10 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_plugin_v22.py @@ -120,6 +120,7 @@ def datahub_task_status_callback(context, status): dag=dag, capture_tags=config.capture_tags_info, capture_owner=config.capture_ownership_info, + config=config, ) datajob.inlets.extend( entities_to_dataset_urn_list([let.urn for let in task_inlets]) @@ -143,6 +144,7 @@ def datahub_task_status_callback(context, status): dag_run=context["dag_run"], datajob=datajob, start_timestamp_millis=int(ti.start_date.timestamp() * 1000), + config=config, ) task.log.info(f"Emitted Start Datahub Dataprocess Instance: {dpi}") @@ -185,6 +187,7 @@ def datahub_pre_execution(context): dag=dag, capture_tags=config.capture_tags_info, capture_owner=config.capture_ownership_info, + config=config, ) datajob.inlets.extend( entities_to_dataset_urn_list([let.urn for let in task_inlets]) @@ -208,6 +211,7 @@ def datahub_pre_execution(context): dag_run=context["dag_run"], datajob=datajob, start_timestamp_millis=int(ti.start_date.timestamp() * 1000), + config=config, ) task.log.info(f"Emitting Datahub Dataprocess Instance: {dpi}") diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py index 75fc79443e49e..daf45e1cd83f8 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/_lineage_core.py @@ -51,6 +51,7 @@ def send_lineage_to_datahub( dag=dag, capture_tags=config.capture_tags_info, capture_owner=config.capture_ownership_info, + config=config, ) datajob.inlets.extend(entities_to_dataset_urn_list([let.urn for let in inlets])) datajob.outlets.extend(entities_to_dataset_urn_list([let.urn for let in outlets])) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py index 3ebe7831d08f9..6f81812ea766e 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/lineage/datahub.py @@ -71,7 +71,11 @@ def send_lineage( try: context = context or {} # ensure not None to satisfy mypy send_lineage_to_datahub( - config, operator, operator.inlets, operator.outlets, context + config, + operator, + operator.inlets, + operator.outlets, + context, ) except Exception as e: operator.log.error(e) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index bbbab73fd1cf5..74dcde5e066b3 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -245,6 +245,10 @@ powerbi_report_server = {"requests", "requests_ntlm"} +slack = { + "slack-sdk==3.18.1" +} + databricks = { # 0.1.11 appears to have authentication issues with azure databricks "databricks-sdk>=0.9.0", @@ -367,6 +371,7 @@ "snowflake": snowflake_common | usage_common | sqlglot_lib, "sqlalchemy": sql_common, "sql-queries": usage_common | sqlglot_lib, + "slack": slack, "superset": { "requests", "sqlalchemy", @@ -503,6 +508,7 @@ "redshift", "s3", "snowflake", + "slack", "tableau", "teradata", "trino", @@ -543,6 +549,7 @@ "kafka-connect", "ldap", "mongodb", + "slack", "mssql", "mysql", "mariadb", @@ -597,6 +604,7 @@ "postgres = datahub.ingestion.source.sql.postgres:PostgresSource", "redash = datahub.ingestion.source.redash:RedashSource", "redshift = datahub.ingestion.source.redshift.redshift:RedshiftSource", + "slack = datahub.ingestion.source.slack.slack:SlackSource", "snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source", "superset = datahub.ingestion.source.superset:SupersetSource", "tableau = datahub.ingestion.source.tableau:TableauSource", diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/slack/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py new file mode 100644 index 0000000000000..ed425cc25d98f --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py @@ -0,0 +1,181 @@ +import logging +import textwrap +from dataclasses import dataclass +from typing import Iterable, Optional + +from pydantic import Field, SecretStr +from slack_sdk import WebClient + +from datahub.configuration.common import ConfigModel +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.api.decorators import ( + SupportStatus, + config_class, + platform_name, + support_status, +) +from datahub.ingestion.api.source import ( + SourceReport, + TestableSource, + TestConnectionReport, +) +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import CorpUserEditableInfoClass +from datahub.utilities.urns.urn import Urn + +logger: logging.Logger = logging.getLogger(__name__) + + +@dataclass +class CorpUser: + urn: Optional[str] = None + email: Optional[str] = None + slack_id: Optional[str] = None + title: Optional[str] = None + image_url: Optional[str] = None + phone: Optional[str] = None + + +class SlackSourceConfig(ConfigModel): + bot_token: SecretStr = Field( + description="Bot token for the Slack workspace. Needs `users:read`, `users:read.email` and `users.profile:read` scopes.", + ) + + +@platform_name("Slack") +@config_class(SlackSourceConfig) +@support_status(SupportStatus.TESTING) +class SlackSource(TestableSource): + def __init__(self, ctx: PipelineContext, config: SlackSourceConfig): + self.ctx = ctx + self.config = config + self.report = SourceReport() + + @classmethod + def create(cls, config_dict, ctx): + config = SlackSourceConfig.parse_obj(config_dict) + return cls(ctx, config) + + @staticmethod + def test_connection(config_dict: dict) -> TestConnectionReport: + raise NotImplementedError("This class does not implement this method") + + def get_slack_client(self) -> WebClient: + return WebClient(token=self.config.bot_token.get_secret_value()) + + def get_workunits_internal( + self, + ) -> Iterable[MetadataWorkUnit]: + assert self.ctx.graph is not None + auth_resp = self.get_slack_client().auth_test() + logger.info("Successfully connected to Slack") + logger.info(auth_resp.data) + for user_obj in self.get_user_to_be_updated(): + self.populate_slack_id_from_email(user_obj) + if user_obj.slack_id is None: + continue + self.populate_user_profile(user_obj) + if user_obj.urn is None: + continue + logger.info(f"User: {user_obj}") + corpuser_editable_info = ( + self.ctx.graph.get_aspect( + entity_urn=user_obj.urn, aspect_type=CorpUserEditableInfoClass + ) + or CorpUserEditableInfoClass() + ) + corpuser_editable_info.email = user_obj.email + corpuser_editable_info.slack = user_obj.slack_id + corpuser_editable_info.title = user_obj.title + if user_obj.image_url: + corpuser_editable_info.pictureLink = user_obj.image_url + if user_obj.phone: + corpuser_editable_info.phone = user_obj.phone + yield MetadataWorkUnit( + id=f"{user_obj.urn}", + mcp=MetadataChangeProposalWrapper( + entityUrn=user_obj.urn, + aspect=corpuser_editable_info, + ), + ) + + def populate_user_profile(self, user_obj: CorpUser) -> None: + try: + # https://api.slack.com/methods/users.profile.get + user_profile_res = self.get_slack_client().users_profile_get( + user=user_obj.slack_id + ) + user_profile = user_profile_res.get("profile", {}) + user_obj.title = user_profile.get("title") + user_obj.image_url = user_profile.get("image_192") + user_obj.phone = user_profile.get("phone") + except Exception as e: + if "missing_scope" in str(e): + raise e + return + + def populate_slack_id_from_email(self, user_obj: CorpUser) -> None: + if user_obj.email is None: + return + try: + # https://api.slack.com/methods/users.lookupByEmail + user_info_res = self.get_slack_client().users_lookupByEmail( + email=user_obj.email + ) + user_info = user_info_res.get("user", {}) + user_obj.slack_id = user_info.get("id") + except Exception as e: + if "users_not_found" in str(e): + return + raise e + + def get_user_to_be_updated(self) -> Iterable[CorpUser]: + graphql_query = textwrap.dedent( + """ + query listUsers($input: ListUsersInput!) { + listUsers(input: $input) { + total + users { + urn + editableProperties { + email + slack + } + } + } + } + """ + ) + start = 0 + count = 10 + total = count + + assert self.ctx.graph is not None + + while start < total: + variables = {"input": {"start": start, "count": count}} + response = self.ctx.graph.execute_graphql( + query=graphql_query, variables=variables + ) + list_users = response.get("listUsers", {}) + total = list_users.get("total", 0) + users = list_users.get("users", []) + for user in users: + user_obj = CorpUser() + editable_properties = user.get("editableProperties", {}) + user_obj.urn = user.get("urn") + if user_obj.urn is None: + continue + if editable_properties is not None: + user_obj.email = editable_properties.get("email") + if user_obj.email is None: + urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string() + if "@" in urn_id: + user_obj.email = urn_id + if user_obj.email is not None: + yield user_obj + start += count + + def get_report(self) -> SourceReport: + return self.report diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 9a3bc9e319d2b..15de029340a3c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -229,9 +229,11 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) { + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) { // TODO: cache browseV2 results - return _entitySearchService.browseV2(entityName, path, filter, input, start, count); + return _entitySearchService.browseV2( + entityName, path, filter, input, start, count, searchFlags); } /** @@ -253,9 +255,11 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) { + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) { // TODO: cache browseV2 results - return _entitySearchService.browseV2(entityNames, path, filter, input, start, count); + return _entitySearchService.browseV2( + entityNames, path, filter, input, start, count, searchFlags); } @SneakyThrows diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index 7f15e3a7fd8fc..eec5c6120886d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -15,6 +15,7 @@ import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; import com.datahub.util.exception.ModelConversionException; +import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterators; @@ -146,7 +147,8 @@ public class EntityServiceImpl implements EntityService { private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3; protected final AspectDao _aspectDao; - private final EventProducer _producer; + + @VisibleForTesting @Getter private final EventProducer _producer; private final EntityRegistry _entityRegistry; private final Map> _entityToValidAspects; private RetentionService _retentionService; @@ -637,10 +639,15 @@ public List ingestAspects( @Override public List ingestAspects( @Nonnull final AspectsBatch aspectsBatch, boolean emitMCL, boolean overwrite) { + Set items = new HashSet<>(aspectsBatch.getItems()); + + // Generate additional items as needed + items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2)); + AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build(); Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); - List ingestResults = ingestAspectsToLocalDB(aspectsBatch, overwrite); + List ingestResults = ingestAspectsToLocalDB(withDefaults, overwrite); List mclResults = emitMCL(ingestResults, emitMCL); ingestToLocalDBTimer.stop(); @@ -964,7 +971,7 @@ public IngestResult ingestProposal( */ @Override public Set ingestProposal(AspectsBatch aspectsBatch, final boolean async) { - Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch); + Stream timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch, async); Stream nonTimeseriesIngestResults = async ? ingestProposalAsync(aspectsBatch) : ingestProposalSync(aspectsBatch); @@ -978,7 +985,8 @@ public Set ingestProposal(AspectsBatch aspectsBatch, final boolean * @param aspectsBatch timeseries upserts batch * @return returns ingest proposal result, however was never in the MCP topic */ - private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch) { + private Stream ingestTimeseriesProposal( + AspectsBatch aspectsBatch, final boolean async) { List unsupported = aspectsBatch.getItems().stream() .filter( @@ -992,6 +1000,20 @@ private Stream ingestTimeseriesProposal(AspectsBatch aspectsBatch) + unsupported.stream().map(BatchItem::getChangeType).collect(Collectors.toSet())); } + if (!async) { + // Create default non-timeseries aspects for timeseries aspects + List timeseriesItems = + aspectsBatch.getItems().stream() + .filter(item -> item.getAspectSpec().isTimeseries()) + .collect(Collectors.toList()); + + List defaultAspects = + DefaultAspectsUtil.getAdditionalChanges( + AspectsBatchImpl.builder().items(timeseriesItems).build(), this, enableBrowseV2); + ingestProposalSync(AspectsBatchImpl.builder().items(defaultAspects).build()); + } + + // Emit timeseries MCLs List, Boolean>>>> timeseriesResults = aspectsBatch.getItems().stream() .filter(item -> item.getAspectSpec().isTimeseries()) @@ -1080,17 +1102,10 @@ private Stream ingestProposalAsync(AspectsBatch aspectsBatch) { } private Stream ingestProposalSync(AspectsBatch aspectsBatch) { - Set items = new HashSet<>(aspectsBatch.getItems()); - - // Generate additional items as needed - items.addAll(DefaultAspectsUtil.getAdditionalChanges(aspectsBatch, this, enableBrowseV2)); - - AspectsBatch withDefaults = AspectsBatchImpl.builder().items(items).build(); - AspectsBatchImpl nonTimeseries = AspectsBatchImpl.builder() .items( - withDefaults.getItems().stream() + aspectsBatch.getItems().stream() .filter(item -> !item.getAspectSpec().isTimeseries()) .collect(Collectors.toList())) .build(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java index 7cba2e0ecc8cb..c20c16e0ea7d1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchService.java @@ -215,8 +215,9 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count) { - return esBrowseDAO.browseV2(entityName, path, filter, input, start, count); + int count, + @Nullable SearchFlags searchFlags) { + return esBrowseDAO.browseV2(entityName, path, filter, input, start, count, searchFlags); } @Nonnull @@ -227,8 +228,9 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count) { - return esBrowseDAO.browseV2(entityNames, path, filter, input, start, count); + int count, + @Nullable SearchFlags searchFlags) { + return esBrowseDAO.browseV2(entityNames, path, filter, input, start, count, searchFlags); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 0a9a9fbbad086..b808588520089 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; import com.linkedin.metadata.search.utils.ESUtils; @@ -34,6 +35,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -399,14 +401,15 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count) { + int count, + @Nullable SearchFlags searchFlags) { try { final SearchResponse groupsResponse; try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "esGroupSearch").time()) { final String finalInput = input.isEmpty() ? "*" : input; groupsResponse = client.search( - constructGroupsSearchRequestV2(entityName, path, filter, finalInput), + constructGroupsSearchRequestV2(entityName, path, filter, finalInput, searchFlags), RequestOptions.DEFAULT); } @@ -435,7 +438,8 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count) { + int count, + @Nullable SearchFlags searchFlags) { try { final SearchResponse groupsResponse; @@ -444,7 +448,7 @@ public BrowseResultV2 browseV2( groupsResponse = client.search( constructGroupsSearchRequestBrowseAcrossEntities( - entities, path, filter, finalInput), + entities, path, filter, finalInput, searchFlags), RequestOptions.DEFAULT); } @@ -472,7 +476,8 @@ private SearchRequest constructGroupsSearchRequestV2( @Nonnull String entityName, @Nonnull String path, @Nullable Filter filter, - @Nonnull String input) { + @Nonnull String input, + @Nullable SearchFlags searchFlags) { final String indexName = indexConvention.getIndexName(entityRegistry.getEntitySpec(entityName)); final SearchRequest searchRequest = new SearchRequest(indexName); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); @@ -482,7 +487,8 @@ private SearchRequest constructGroupsSearchRequestV2( entityName, path, SearchUtil.transformFilterForEntities(filter, indexConvention), - input)); + input, + searchFlags)); searchSourceBuilder.aggregation(buildAggregationsV2(path)); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -493,7 +499,8 @@ private SearchRequest constructGroupsSearchRequestBrowseAcrossEntities( @Nonnull List entities, @Nonnull String path, @Nullable Filter filter, - @Nonnull String input) { + @Nonnull String input, + @Nullable SearchFlags searchFlags) { List entitySpecs = entities.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); @@ -509,7 +516,8 @@ private SearchRequest constructGroupsSearchRequestBrowseAcrossEntities( entitySpecs, path, SearchUtil.transformFilterForEntities(filter, indexConvention), - input)); + input, + searchFlags)); searchSourceBuilder.aggregation(buildAggregationsV2(path)); searchRequest.source(searchSourceBuilder); return searchRequest; @@ -537,7 +545,10 @@ private QueryBuilder buildQueryStringV2( @Nonnull String entityName, @Nonnull String path, @Nullable Filter filter, - @Nonnull String input) { + @Nonnull String input, + @Nullable SearchFlags searchFlags) { + SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(new SearchFlags().setFulltext(true)); final int browseDepthVal = getPathDepthV2(path); final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); @@ -545,7 +556,7 @@ private QueryBuilder buildQueryStringV2( EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); QueryBuilder query = SearchRequestHandler.getBuilder(entitySpec, searchConfiguration, customSearchConfiguration) - .getQuery(input, false); + .getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext())); queryBuilder.must(query); filterSoftDeletedByDefault(filter, queryBuilder); @@ -567,14 +578,17 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( @Nonnull List entitySpecs, @Nonnull String path, @Nullable Filter filter, - @Nonnull String input) { + @Nonnull String input, + @Nullable SearchFlags searchFlags) { + SearchFlags finalSearchFlags = + Optional.ofNullable(searchFlags).orElse(new SearchFlags().setFulltext(true)); final int browseDepthVal = getPathDepthV2(path); final BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery(); QueryBuilder query = SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) - .getQuery(input, false); + .getQuery(input, Boolean.TRUE.equals(finalSearchFlags.isFulltext())); queryBuilder.must(query); if (!path.isEmpty()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 7ddccb0d56724..4c704f81b4c13 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -135,14 +135,10 @@ private QueryBuilder buildInternalQuery( query.startsWith(STRUCTURED_QUERY_PREFIX) ? query.substring(STRUCTURED_QUERY_PREFIX.length()) : query; - - QueryStringQueryBuilder queryBuilder = QueryBuilders.queryStringQuery(withoutQueryPrefix); - queryBuilder.defaultOperator(Operator.AND); - getStandardFields(entitySpecs) - .forEach(entitySpec -> queryBuilder.field(entitySpec.fieldName(), entitySpec.boost())); - finalQuery.should(queryBuilder); + getStructuredQuery(customQueryConfig, entitySpecs, withoutQueryPrefix) + .ifPresent(finalQuery::should); if (exactMatchConfiguration.isEnableStructured()) { - getPrefixAndExactMatchQuery(null, entitySpecs, withoutQueryPrefix) + getPrefixAndExactMatchQuery(customQueryConfig, entitySpecs, withoutQueryPrefix) .ifPresent(finalQuery::should); } } @@ -415,6 +411,29 @@ private Optional getPrefixAndExactMatchQuery( return finalQuery.should().size() > 0 ? Optional.of(finalQuery) : Optional.empty(); } + private Optional getStructuredQuery( + @Nullable QueryConfiguration customQueryConfig, + List entitySpecs, + String sanitizedQuery) { + Optional result = Optional.empty(); + + final boolean executeStructuredQuery; + if (customQueryConfig != null) { + executeStructuredQuery = customQueryConfig.isStructuredQuery(); + } else { + executeStructuredQuery = !(isQuoted(sanitizedQuery) && exactMatchConfiguration.isExclusive()); + } + + if (executeStructuredQuery) { + QueryStringQueryBuilder queryBuilder = QueryBuilders.queryStringQuery(sanitizedQuery); + queryBuilder.defaultOperator(Operator.AND); + getStandardFields(entitySpecs) + .forEach(entitySpec -> queryBuilder.field(entitySpec.fieldName(), entitySpec.boost())); + result = Optional.of(queryBuilder); + } + return result; + } + private FunctionScoreQueryBuilder buildScoreFunctions( @Nullable QueryConfiguration customQueryConfig, @Nonnull List entitySpecs, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index ea4e97d264bca..384b54c7a1c8d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -479,7 +479,7 @@ public void testIngestAspectsGetLatestAspects() throws Exception { assertTrue(DataTemplateUtil.areEqual(writeAspect1, latestAspects.get(aspectName1))); assertTrue(DataTemplateUtil.areEqual(writeAspect2, latestAspects.get(aspectName2))); - verify(_mockProducer, times(2)) + verify(_mockProducer, times(3)) .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); verifyNoMoreInteractions(_mockProducer); @@ -772,6 +772,12 @@ public void testUpdateGetAspect() throws AssertionError { .produceMetadataChangeLog( Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); + verify(_mockProducer, times(1)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")), + Mockito.any()); + verifyNoMoreInteractions(_mockProducer); } @@ -824,6 +830,13 @@ public void testGetAspectAtVersion() throws AssertionError { readAspect1 = _entityServiceImpl.getVersionedAspect(entityUrn, aspectName, -1); assertFalse(DataTemplateUtil.areEqual(writtenVersionedAspect1, readAspect1)); + // check key aspect + verify(_mockProducer, times(1)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpuser").getAspectSpec("corpUserKey")), + Mockito.any()); + verifyNoMoreInteractions(_mockProducer); } @@ -1094,13 +1107,22 @@ public void testIngestGetLatestAspect() throws AssertionError { ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")), + mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); assertNull(mcl.getPreviousSystemMetadata()); assertEquals(mcl.getChangeType(), ChangeType.UPSERT); + verify(_mockProducer, times(1)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")), + Mockito.any()); + verifyNoMoreInteractions(_mockProducer); reset(_mockProducer); @@ -1201,7 +1223,16 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(2)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")), + Mockito.any()); + + verify(_mockProducer, times(1)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")), + Mockito.any()); verifyNoMoreInteractions(_mockProducer); } @@ -1234,9 +1265,18 @@ public void testIngestSameAspect() throws AssertionError { RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspect1)); + verify(_mockProducer, times(1)) + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserKey")), + Mockito.any()); + ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)) - .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); + .produceMetadataChangeLog( + Mockito.eq(entityUrn), + Mockito.eq(_testEntityRegistry.getEntitySpec("corpUser").getAspectSpec("corpUserInfo")), + mclCaptor.capture()); MetadataChangeLog mcl = mclCaptor.getValue(); assertEquals(mcl.getEntityType(), "corpuser"); assertNull(mcl.getPreviousAspectValue()); diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java index 901bf803d2bca..e3a9d076dbef2 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java @@ -19,6 +19,13 @@ public class QueryConfiguration { private String queryRegex; @Builder.Default private boolean simpleQuery = true; + + /** + * Used to determine if standard structured query logic should be applied when relevant, i.e. + * fullText flag is false. Will not be added in cases where simpleQuery would be the standard. + */ + @Builder.Default private boolean structuredQuery = true; + @Builder.Default private boolean exactMatchQuery = true; @Builder.Default private boolean prefixMatchQuery = true; private BoolQueryConfiguration boolQuery; diff --git a/metadata-service/configuration/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml index d4c11d4aa53bd..c2a0d508b57d6 100644 --- a/metadata-service/configuration/src/main/resources/application.yml +++ b/metadata-service/configuration/src/main/resources/application.yml @@ -314,6 +314,14 @@ systemUpdate: maxBackOffs: ${BOOTSTRAP_SYSTEM_UPDATE_MAX_BACK_OFFS:50} backOffFactor: ${BOOTSTRAP_SYSTEM_UPDATE_BACK_OFF_FACTOR:2} # Multiplicative factor for back off, default values will result in waiting 5min 15s waitForSystemUpdate: ${BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE:true} + dataJobNodeCLL: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_ENABLED:true} + batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:200} + browsePathsV2: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true} + batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_BATCH_SIZE:5000} + reprocess: + enabled: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:false} structuredProperties: enabled: ${ENABLE_STRUCTURED_PROPERTIES_HOOK:true} # applies structured properties mappings diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 871f16d97be33..2ccdee5fb1dbf 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -1,20 +1,15 @@ package com.linkedin.gms.factory.entity; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.gms.factory.common.TopicConventionFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.dao.producer.KafkaEventProducer; -import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ebean.batch.MCPUpsertBatchItem; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.service.UpdateIndicesService; -import com.linkedin.mxe.TopicConvention; import javax.annotation.Nonnull; -import org.apache.avro.generic.IndexedRecord; -import org.apache.kafka.clients.producer.Producer; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -28,26 +23,16 @@ public class EntityServiceFactory { private Integer _ebeanMaxTransactionRetry; @Bean(name = "entityService") - @DependsOn({ - "entityAspectDao", - "kafkaEventProducer", - "kafkaHealthChecker", - TopicConventionFactory.TOPIC_CONVENTION_BEAN, - "entityRegistry" - }) + @DependsOn({"entityAspectDao", "kafkaEventProducer", "entityRegistry"}) @Nonnull protected EntityService createInstance( - Producer producer, - TopicConvention convention, - KafkaHealthChecker kafkaHealthChecker, + @Qualifier("kafkaEventProducer") final KafkaEventProducer eventProducer, @Qualifier("entityAspectDao") AspectDao aspectDao, EntityRegistry entityRegistry, ConfigurationProvider configurationProvider, UpdateIndicesService updateIndicesService, @Value("${featureFlags.showBrowseV2}") final boolean enableBrowsePathV2) { - final KafkaEventProducer eventProducer = - new KafkaEventProducer(producer, convention, kafkaHealthChecker); FeatureFlags featureFlags = configurationProvider.getFeatureFlags(); return new EntityServiceImpl( diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java deleted file mode 100644 index 4819984307af9..0000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.linkedin.gms.factory.kafka.schemaregistry; - -import static com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener.TOPIC_NAME; - -import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.boot.kafka.MockDUHEDeserializer; -import com.linkedin.metadata.boot.kafka.MockDUHESerializer; -import com.linkedin.metadata.config.kafka.KafkaConfiguration; -import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; -import java.util.HashMap; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -@Slf4j -@Configuration -public class DUHESchemaRegistryFactory { - - public static final String DUHE_SCHEMA_REGISTRY_TOPIC_KEY = "duheTopicName"; - - @Value(TOPIC_NAME) - private String duheTopicName; - - /** Configure Kafka Producer/Consumer processes with a custom schema registry. */ - @Bean("duheSchemaRegistryConfig") - protected SchemaRegistryConfig duheSchemaRegistryConfig(ConfigurationProvider provider) { - Map props = new HashMap<>(); - KafkaConfiguration kafkaConfiguration = provider.getKafka(); - - props.put( - AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, - kafkaConfiguration.getSchemaRegistry().getUrl()); - props.put(DUHE_SCHEMA_REGISTRY_TOPIC_KEY, duheTopicName); - - log.info("DataHub System Update Registry"); - return new SchemaRegistryConfig(MockDUHESerializer.class, MockDUHEDeserializer.class, props); - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java index 8c814e5054758..46b27195ecc67 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java @@ -1,11 +1,7 @@ package com.linkedin.gms.factory.kafka.schemaregistry; -import com.linkedin.gms.factory.common.TopicConventionFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.config.kafka.KafkaConfiguration; -import com.linkedin.metadata.registry.SchemaRegistryService; -import com.linkedin.metadata.registry.SchemaRegistryServiceImpl; -import com.linkedin.mxe.TopicConvention; import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; import io.confluent.kafka.serializers.KafkaAvroDeserializer; import io.confluent.kafka.serializers.KafkaAvroSerializer; @@ -17,7 +13,6 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.DependsOn; @Slf4j @Configuration @@ -45,11 +40,4 @@ protected SchemaRegistryConfig getInstance( kafkaConfiguration.getSchemaRegistry().getUrl()); return new SchemaRegistryConfig(KafkaAvroSerializer.class, KafkaAvroDeserializer.class, props); } - - @Bean(name = "schemaRegistryService") - @Nonnull - @DependsOn({TopicConventionFactory.TOPIC_CONVENTION_BEAN}) - protected SchemaRegistryService schemaRegistryService(TopicConvention convention) { - return new SchemaRegistryServiceImpl(convention); - } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java new file mode 100644 index 0000000000000..a6869321d796f --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SchemaRegistryServiceFactory.java @@ -0,0 +1,20 @@ +package com.linkedin.gms.factory.kafka.schemaregistry; + +import com.linkedin.gms.factory.common.TopicConventionFactory; +import com.linkedin.metadata.registry.SchemaRegistryService; +import com.linkedin.metadata.registry.SchemaRegistryServiceImpl; +import com.linkedin.mxe.TopicConvention; +import javax.annotation.Nonnull; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.DependsOn; + +@Configuration +public class SchemaRegistryServiceFactory { + @Bean(name = "schemaRegistryService") + @Nonnull + @DependsOn({TopicConventionFactory.TOPIC_CONVENTION_BEAN}) + protected SchemaRegistryService schemaRegistryService(TopicConvention convention) { + return new SchemaRegistryServiceImpl(convention); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java new file mode 100644 index 0000000000000..d02cdc0e68f52 --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/SystemUpdateSchemaRegistryFactory.java @@ -0,0 +1,66 @@ +package com.linkedin.gms.factory.kafka.schemaregistry; + +import static com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener.TOPIC_NAME; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer; +import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import com.linkedin.metadata.registry.SchemaRegistryService; +import com.linkedin.mxe.Topics; +import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; +import java.util.HashMap; +import java.util.Map; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class SystemUpdateSchemaRegistryFactory { + + public static final String SYSTEM_UPDATE_TOPIC_KEY_PREFIX = "data-hub.system-update.topic-key."; + public static final String SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX = ".id"; + + public static final String DUHE_SCHEMA_REGISTRY_TOPIC_KEY = + SYSTEM_UPDATE_TOPIC_KEY_PREFIX + "duhe"; + public static final String MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY = + SYSTEM_UPDATE_TOPIC_KEY_PREFIX + "mcl-versioned"; + + @Value(TOPIC_NAME) + private String duheTopicName; + + @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}") + private String mclTopicName; + + /** Configure Kafka Producer/Consumer processes with a custom schema registry. */ + @Bean("duheSchemaRegistryConfig") + protected SchemaRegistryConfig duheSchemaRegistryConfig( + final ConfigurationProvider provider, final SchemaRegistryService schemaRegistryService) { + Map props = new HashMap<>(); + KafkaConfiguration kafkaConfiguration = provider.getKafka(); + + props.put( + AbstractKafkaSchemaSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, + kafkaConfiguration.getSchemaRegistry().getUrl()); + + // topic names + props.putAll( + Map.of( + DUHE_SCHEMA_REGISTRY_TOPIC_KEY, duheTopicName, + MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY, mclTopicName)); + + // topic ordinals + props.putAll( + Map.of( + DUHE_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX, + schemaRegistryService.getSchemaIdForTopic(duheTopicName).get().toString(), + MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX, + schemaRegistryService.getSchemaIdForTopic(mclTopicName).get().toString())); + + log.info("DataHub System Update Registry"); + return new SchemaRegistryConfig( + MockSystemUpdateSerializer.class, MockSystemUpdateDeserializer.class, props); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java index a79bdacfc55e9..2dccda4243bca 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/BootstrapStep.java @@ -1,16 +1,15 @@ package com.linkedin.metadata.boot; -import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DataHubUpgradeKey; +import com.linkedin.metadata.utils.AuditStampUtils; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.upgrade.DataHubUpgradeResult; -import java.net.URISyntaxException; import javax.annotation.Nonnull; /** A single step in the Bootstrap process. */ @@ -40,24 +39,10 @@ static Urn getUpgradeUrn(String upgradeId) { new DataHubUpgradeKey().setId(upgradeId), Constants.DATA_HUB_UPGRADE_ENTITY_NAME); } - static void setUpgradeResult(Urn urn, EntityService entityService) throws URISyntaxException { - final AuditStamp auditStamp = - new AuditStamp() - .setActor(Urn.createFromString(Constants.SYSTEM_ACTOR)) - .setTime(System.currentTimeMillis()); + static void setUpgradeResult(Urn urn, EntityService entityService) { final DataHubUpgradeResult upgradeResult = new DataHubUpgradeResult().setTimestampMs(System.currentTimeMillis()); - // Workaround because entity service does not auto-generate the key aspect for us - final MetadataChangeProposal keyProposal = new MetadataChangeProposal(); - final DataHubUpgradeKey upgradeKey = new DataHubUpgradeKey().setId(urn.getId()); - keyProposal.setEntityUrn(urn); - keyProposal.setEntityType(Constants.DATA_HUB_UPGRADE_ENTITY_NAME); - keyProposal.setAspectName(Constants.DATA_HUB_UPGRADE_KEY_ASPECT_NAME); - keyProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeKey)); - keyProposal.setChangeType(ChangeType.UPSERT); - entityService.ingestProposal(keyProposal, auditStamp, false); - // Ingest the upgrade result final MetadataChangeProposal upgradeProposal = new MetadataChangeProposal(); upgradeProposal.setEntityUrn(urn); @@ -65,6 +50,6 @@ static void setUpgradeResult(Urn urn, EntityService entityService) throws URI upgradeProposal.setAspectName(Constants.DATA_HUB_UPGRADE_RESULT_ASPECT_NAME); upgradeProposal.setAspect(GenericRecordUtils.serializeAspect(upgradeResult)); upgradeProposal.setChangeType(ChangeType.UPSERT); - entityService.ingestProposal(upgradeProposal, auditStamp, false); + entityService.ingestProposal(upgradeProposal, AuditStampUtils.createDefaultAuditStamp(), false); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java deleted file mode 100644 index 36fe514d5536f..0000000000000 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHESerializer.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.linkedin.metadata.boot.kafka; - -import static com.linkedin.gms.factory.kafka.schemaregistry.DUHESchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY; - -import com.linkedin.metadata.EventUtils; -import io.confluent.kafka.schemaregistry.avro.AvroSchema; -import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; -import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; -import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; -import io.confluent.kafka.serializers.KafkaAvroSerializer; -import java.io.IOException; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; - -/** Used for early bootstrap to avoid contact with not yet existing schema registry */ -@Slf4j -public class MockDUHESerializer extends KafkaAvroSerializer { - - private static final String DATAHUB_UPGRADE_HISTORY_EVENT_SUBJECT_SUFFIX = "-value"; - - private String topicName; - - public MockDUHESerializer() { - this.schemaRegistry = buildMockSchemaRegistryClient(); - } - - public MockDUHESerializer(SchemaRegistryClient client) { - super(client); - this.schemaRegistry = buildMockSchemaRegistryClient(); - } - - public MockDUHESerializer(SchemaRegistryClient client, Map props) { - super(client, props); - this.schemaRegistry = buildMockSchemaRegistryClient(); - } - - @Override - public void configure(Map configs, boolean isKey) { - super.configure(configs, isKey); - topicName = configs.get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY).toString(); - } - - private MockSchemaRegistryClient buildMockSchemaRegistryClient() { - MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); - try { - schemaRegistry.register( - topicToSubjectName(topicName), new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA)); - return schemaRegistry; - } catch (IOException | RestClientException e) { - throw new RuntimeException(e); - } - } - - public static String topicToSubjectName(String topicName) { - return topicName + DATAHUB_UPGRADE_HISTORY_EVENT_SUBJECT_SUFFIX; - } -} diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java similarity index 57% rename from metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java rename to metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java index e631f776abd08..74a20cdacbb21 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockDUHEDeserializer.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateDeserializer.java @@ -1,50 +1,49 @@ package com.linkedin.metadata.boot.kafka; -import static com.linkedin.gms.factory.kafka.schemaregistry.DUHESchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY; -import static com.linkedin.metadata.boot.kafka.MockDUHESerializer.topicToSubjectName; +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY; +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX; +import static com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer.topicToSubjectName; import com.linkedin.metadata.EventUtils; import io.confluent.kafka.schemaregistry.ParsedSchema; import io.confluent.kafka.schemaregistry.avro.AvroSchema; import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; -import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; import io.confluent.kafka.serializers.KafkaAvroDeserializer; import java.io.IOException; import java.util.Map; import lombok.extern.slf4j.Slf4j; -/** Used for early bootstrap to avoid contact with not yet existing schema registry */ +/** + * Used for early bootstrap to avoid contact with not yet existing schema registry Only supports the + * DUHE topic + */ @Slf4j -public class MockDUHEDeserializer extends KafkaAvroDeserializer { +public class MockSystemUpdateDeserializer extends KafkaAvroDeserializer { private String topicName; - - public MockDUHEDeserializer() { - this.schemaRegistry = buildMockSchemaRegistryClient(); - } - - public MockDUHEDeserializer(SchemaRegistryClient client) { - super(client); - this.schemaRegistry = buildMockSchemaRegistryClient(); - } - - public MockDUHEDeserializer(SchemaRegistryClient client, Map props) { - super(client, props); - this.schemaRegistry = buildMockSchemaRegistryClient(); - } + private Integer schemaId; @Override public void configure(Map configs, boolean isKey) { super.configure(configs, isKey); topicName = configs.get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY).toString(); + schemaId = + Integer.valueOf( + configs + .get(DUHE_SCHEMA_REGISTRY_TOPIC_KEY + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX) + .toString()); + this.schemaRegistry = buildMockSchemaRegistryClient(); } private MockSchemaRegistryClient buildMockSchemaRegistryClient() { - MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient2(); + MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient2(schemaId); try { schemaRegistry.register( - topicToSubjectName(topicName), new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA)); + topicToSubjectName(topicName), + new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA), + 0, + schemaId); return schemaRegistry; } catch (IOException | RestClientException e) { throw new RuntimeException(e); @@ -52,13 +51,19 @@ private MockSchemaRegistryClient buildMockSchemaRegistryClient() { } public static class MockSchemaRegistryClient2 extends MockSchemaRegistryClient { + private final int schemaId; + + public MockSchemaRegistryClient2(int schemaId) { + this.schemaId = schemaId; + } + /** * Previously used topics can have schema ids > 1 which fully match however we are replacing * that registry so force schema id to 1 */ @Override public synchronized ParsedSchema getSchemaById(int id) throws IOException, RestClientException { - return super.getSchemaById(1); + return super.getSchemaById(schemaId); } } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java new file mode 100644 index 0000000000000..14aac2758a69d --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/kafka/MockSystemUpdateSerializer.java @@ -0,0 +1,76 @@ +package com.linkedin.metadata.boot.kafka; + +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.DUHE_SCHEMA_REGISTRY_TOPIC_KEY; +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY; +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX; +import static com.linkedin.gms.factory.kafka.schemaregistry.SystemUpdateSchemaRegistryFactory.SYSTEM_UPDATE_TOPIC_KEY_PREFIX; + +import com.linkedin.metadata.EventUtils; +import com.linkedin.util.Pair; +import io.confluent.kafka.schemaregistry.avro.AvroSchema; +import io.confluent.kafka.schemaregistry.client.MockSchemaRegistryClient; +import io.confluent.kafka.schemaregistry.client.rest.exceptions.RestClientException; +import io.confluent.kafka.serializers.KafkaAvroSerializer; +import java.io.IOException; +import java.util.Map; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; + +/** Used for early bootstrap to avoid contact with not yet existing schema registry */ +@Slf4j +public class MockSystemUpdateSerializer extends KafkaAvroSerializer { + + private static final String DATAHUB_SYSTEM_UPDATE_SUBJECT_SUFFIX = "-value"; + + private static final Map AVRO_SCHEMA_MAP = + Map.of( + DUHE_SCHEMA_REGISTRY_TOPIC_KEY, new AvroSchema(EventUtils.ORIGINAL_DUHE_AVRO_SCHEMA), + MCL_VERSIONED_SCHEMA_REGISTRY_TOPIC_KEY, + new AvroSchema(EventUtils.ORIGINAL_MCL_AVRO_SCHEMA)); + + private Map> topicNameToAvroSchemaMap; + + @Override + public void configure(Map configs, boolean isKey) { + super.configure(configs, isKey); + topicNameToAvroSchemaMap = + configs.entrySet().stream() + .filter( + e -> + e.getKey().startsWith(SYSTEM_UPDATE_TOPIC_KEY_PREFIX) + && !e.getKey().endsWith(SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX) + && e.getValue() instanceof String) + .map( + e -> { + Integer id = + Integer.valueOf( + (String) configs.get(e.getKey() + SYSTEM_UPDATE_TOPIC_KEY_ID_SUFFIX)); + return Pair.of( + (String) e.getValue(), Pair.of(AVRO_SCHEMA_MAP.get(e.getKey()), id)); + }) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + this.schemaRegistry = buildMockSchemaRegistryClient(); + } + + private MockSchemaRegistryClient buildMockSchemaRegistryClient() { + MockSchemaRegistryClient schemaRegistry = new MockSchemaRegistryClient(); + + if (topicNameToAvroSchemaMap != null) { + topicNameToAvroSchemaMap.forEach( + (topicName, schemaId) -> { + try { + schemaRegistry.register( + topicToSubjectName(topicName), schemaId.getFirst(), 0, schemaId.getSecond()); + } catch (IOException | RestClientException e) { + throw new RuntimeException(e); + } + }); + } + + return schemaRegistry; + } + + public static String topicToSubjectName(String topicName) { + return topicName + DATAHUB_SYSTEM_UPDATE_SUBJECT_SUFFIX; + } +} diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java index b1b24ac97f0b8..676b80c8bea32 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/EntityClient.java @@ -154,7 +154,8 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) throws RemoteInvocationException; /** @@ -176,7 +177,8 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) throws RemoteInvocationException; @Deprecated diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 3108345bd3937..653ef046ffc02 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -378,7 +378,8 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) { + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) { throw new NotImplementedException("BrowseV2 is not implemented in Restli yet"); } @@ -391,7 +392,8 @@ public BrowseResultV2 browseV2( @Nonnull String input, int start, int count, - @Nonnull Authentication authentication) + @Nonnull Authentication authentication, + @Nullable SearchFlags searchFlags) throws RemoteInvocationException { throw new NotImplementedException("BrowseV2 is not implemented in Restli yet"); } diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index 1678fe92ec70e..17c5160494722 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -122,7 +122,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException { .request(req) .build()))); _aspectResource.ingestProposal(mcp, "false"); - verify(_producer, times(5)) + verify(_producer, times(10)) .produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); verifyNoMoreInteractions(_producer); } diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index 2fec88ad221fd..0d1c031db136e 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -197,6 +197,7 @@ BrowseResult browse( * @param input search query * @param start start offset of first group * @param count max number of results requested + * @param searchFlags configuration options for search */ @Nonnull public BrowseResultV2 browseV2( @@ -205,7 +206,8 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count); + int count, + @Nullable SearchFlags searchFlags); /** * Gets browse snapshot of a given path @@ -216,6 +218,7 @@ public BrowseResultV2 browseV2( * @param input search query * @param start start offset of first group * @param count max number of results requested + * @param searchFlags configuration options for search */ @Nonnull public BrowseResultV2 browseV2( @@ -224,7 +227,8 @@ public BrowseResultV2 browseV2( @Nullable Filter filter, @Nonnull String input, int start, - int count); + int count, + @Nullable SearchFlags searchFlags); /** * Gets a list of paths for a given urn.