Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
treff7es authored Dec 19, 2023
2 parents de06006 + 7b06782 commit f6f3b28
Show file tree
Hide file tree
Showing 50 changed files with 563 additions and 154 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,21 @@ jobs:
strategy:
matrix:
include:
# Note: this should be kept in sync with tox.ini.
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.1.4"
extra_pip_extras: plugin-v1
- python-version: "3.8"
extra_pip_requirements: "apache-airflow~=2.2.4"
extra_pip_extras: plugin-v1
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.4.0"
extra_pip_requirements: 'apache-airflow~=2.4.0 pluggy==1.0.0 "pendulum<3.0"'
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow~=2.6.0"
extra_pip_requirements: 'apache-airflow~=2.6.0 "pendulum<3.0"'
extra_pip_extras: plugin-v2
- python-version: "3.10"
extra_pip_requirements: "apache-airflow>=2.7.0"
extra_pip_requirements: "apache-airflow>=2.7.0 pydantic==2.4.2"
extra_pip_extras: plugin-v2
fail-fast: false
steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class FeatureFlags {
private boolean readOnlyModeEnabled = false;
private boolean showSearchFiltersV2 = false;
private boolean showBrowseV2 = false;
private boolean platformBrowseV2 = false;
private PreProcessHooks preProcessHooks;
private boolean showAcrylInfo = false;
private boolean showAccessManagement = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

import static com.linkedin.datahub.graphql.Constants.BROWSE_PATH_V2_DELIMITER;
import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument;
import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.resolveView;
import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*;

import com.google.common.collect.ImmutableList;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.BrowseResultGroupV2;
import com.linkedin.datahub.graphql.generated.BrowseResultMetadata;
import com.linkedin.datahub.graphql.generated.BrowseResultsV2;
import com.linkedin.datahub.graphql.generated.BrowseV2Input;
import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper;
import com.linkedin.datahub.graphql.resolvers.ResolverUtils;
import com.linkedin.datahub.graphql.resolvers.search.SearchUtils;
Expand Down Expand Up @@ -43,8 +45,8 @@ public class BrowseV2Resolver implements DataFetcher<CompletableFuture<BrowseRes
public CompletableFuture<BrowseResultsV2> get(DataFetchingEnvironment environment) {
final QueryContext context = environment.getContext();
final BrowseV2Input input = bindArgument(environment.getArgument("input"), BrowseV2Input.class);
final String entityName = EntityTypeMapper.getName(input.getType());

final List<String> entityNames = getEntityNames(input);
final int start = input.getStart() != null ? input.getStart() : DEFAULT_START;
final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT;
final String query = input.getQuery() != null ? input.getQuery() : "*";
Expand All @@ -70,7 +72,7 @@ public CompletableFuture<BrowseResultsV2> get(DataFetchingEnvironment environmen

BrowseResultV2 browseResults =
_entityClient.browseV2(
entityName,
entityNames,
pathStr,
maybeResolvedView != null
? SearchUtils.combineFilters(
Expand All @@ -87,6 +89,18 @@ public CompletableFuture<BrowseResultsV2> get(DataFetchingEnvironment environmen
});
}

public static List<String> getEntityNames(BrowseV2Input input) {
List<EntityType> entityTypes;
if (input.getTypes() != null && input.getTypes().size() > 0) {
entityTypes = input.getTypes();
} else if (input.getType() != null) {
entityTypes = ImmutableList.of(input.getType());
} else {
entityTypes = BROWSE_ENTITY_TYPES;
}
return entityTypes.stream().map(EntityTypeMapper::getName).collect(Collectors.toList());
}

private BrowseResultsV2 mapBrowseResults(BrowseResultV2 browseResults) {
BrowseResultsV2 results = new BrowseResultsV2();
results.setTotal(browseResults.getNumGroups());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ public CompletableFuture<AppConfig> get(final DataFetchingEnvironment environmen
.setShowAcrylInfo(_featureFlags.isShowAcrylInfo())
.setShowAccessManagement(_featureFlags.isShowAccessManagement())
.setNestedDomainsEnabled(_featureFlags.isNestedDomainsEnabled())
.setPlatformBrowseV2(_featureFlags.isPlatformBrowseV2())
.build();

appConfig.setFeatureFlags(featureFlagsConfig);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,20 @@ private SearchUtils() {}
EntityType.NOTEBOOK,
EntityType.DATA_PRODUCT);

/** Entities that are part of browse by default */
public static final List<EntityType> BROWSE_ENTITY_TYPES =
ImmutableList.of(
EntityType.DATASET,
EntityType.DASHBOARD,
EntityType.CHART,
EntityType.CONTAINER,
EntityType.MLMODEL,
EntityType.MLMODEL_GROUP,
EntityType.MLFEATURE_TABLE,
EntityType.DATA_FLOW,
EntityType.DATA_JOB,
EntityType.NOTEBOOK);

/** A prioritized list of source filter types used to generate quick filters */
public static final List<String> PRIORITIZED_SOURCE_ENTITY_TYPES =
Stream.of(
Expand Down
5 changes: 5 additions & 0 deletions datahub-graphql-core/src/main/resources/app.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,11 @@ type FeatureFlagsConfig {
"""
showBrowseV2: Boolean!

"""
Whether browse v2 is platform mode, which means that platforms are displayed instead of entity types at the root.
"""
platformBrowseV2: Boolean!

"""
Whether we should show CTAs in the UI related to moving to Managed DataHub by Acryl.
"""
Expand Down
9 changes: 7 additions & 2 deletions datahub-graphql-core/src/main/resources/search.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -1176,9 +1176,14 @@ Input required for browse queries
"""
input BrowseV2Input {
"""
The browse entity type
The browse entity type - deprecated use types instead
"""
type: EntityType!
type: EntityType

"""
The browse entity type - deprecated use types instead. If not provided, all types will be used.
"""
types: [EntityType!]

"""
The browse path V2 - a list with each entry being part of the browse path V2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ private static EntityClient initMockEntityClient(
EntityClient client = Mockito.mock(EntityClient.class);
Mockito.when(
client.browseV2(
Mockito.eq(entityName),
Mockito.eq(ImmutableList.of(entityName)),
Mockito.eq(path),
Mockito.eq(filter),
Mockito.eq(query),
Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/appConfigContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export const DEFAULT_APP_CONFIG = {
showAcrylInfo: false,
showAccessManagement: false,
nestedDomainsEnabled: true,
platformBrowseV2: false,
},
};

Expand Down
1 change: 1 addition & 0 deletions datahub-web-react/src/graphql/app.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ query appConfig {
showAcrylInfo
showAccessManagement
nestedDomainsEnabled
platformBrowseV2
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ARG BASE_IMAGE=base
# Defining custom repo urls for use in enterprise environments. Re-used between stages below.
ARG ALPINE_REPO_URL=http://dl-cdn.alpinelinux.org/alpine
ARG GITHUB_REPO_URL=https://github.com
ARG DEBIAN_REPO_URL=http://deb.debian.org/debian
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
ARG PIP_MIRROR_URL=null

FROM golang:1-alpine3.18 AS dockerize-binary
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head
ARG PIP_MIRROR_URL=null
ARG DEBIAN_REPO_URL=http://deb.debian.org/debian
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian

FROM $BASE_IMAGE:$DOCKER_VERSION as base
USER 0
Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ docker {
i -> (!i.file.name.endsWith(".dockerignore") && i.file.isHidden())
}

def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", '')]
def dockerBuildArgs = [DOCKER_VERSION: version, RELEASE_VERSION: version.replace('-SNAPSHOT', '').replace('v', '').replace("-slim", ''), BASE_IMAGE: "${docker_registry}/datahub-ingestion-base"]

// Add build args if they are defined (needed for some CI or enterprise environments)
if (project.hasProperty('pipMirrorUrl')) {
Expand Down
9 changes: 9 additions & 0 deletions metadata-ingestion-modules/airflow-plugin/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ envlist = py38-airflow21, py38-airflow22, py310-airflow24, py310-airflow26, py31
use_develop = true
extras = dev,integration-tests,plugin-v1
deps =
# This should be kept in sync with the Github Actions matrix.
-e ../../metadata-ingestion/
# Airflow version
airflow21: apache-airflow~=2.1.0
Expand All @@ -20,7 +21,15 @@ deps =
# See https://github.com/datahub-project/datahub/pull/9365
airflow24: apache-airflow~=2.4.0,pluggy==1.0.0
airflow26: apache-airflow~=2.6.0
# Respect the constraints file on pendulum.
# See https://github.com/apache/airflow/issues/36274
airflow24,airflow26: pendulum>=2.0,<3.0
# The Airflow 2.7 constraints file points at pydantic v2, so we match that here.
# https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt
# Note that Airflow is actually compatible with both pydantic v1 and v2, and the
# constraints file is overly restrictive.
airflow27: apache-airflow~=2.7.0
airflow27: pydantic==2.4.2
commands =
pytest --cov-append {posargs}

Expand Down
47 changes: 39 additions & 8 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
"mypy_extensions>=0.4.3",
# Actual dependencies.
"typing-inspect",
# pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
# pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885
# pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
"pydantic>=1.5.1,!=1.10.3,<2",
"pydantic>=1.10.0,!=1.10.3",
"mixpanel>=4.9.0",
"sentry-sdk",
}
Expand Down Expand Up @@ -53,6 +54,18 @@
"ruamel.yaml",
}

pydantic_no_v2 = {
# pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
# Tags sources that require the pydantic v2 API.
"pydantic<2",
}

plugin_common = {
# While pydantic v2 support is experimental, require that all plugins
# continue to use v1. This will ensure that no ingestion recipes break.
*pydantic_no_v2,
}

rest_common = {"requests", "requests_file"}

kafka_common = {
Expand Down Expand Up @@ -118,6 +131,7 @@
"sqlalchemy>=1.4.39, <2",
# Required for SQL profiling.
"great-expectations>=0.15.12, <=0.15.50",
*pydantic_no_v2, # because of great-expectations
# scipy version restricted to reduce backtracking, used by great-expectations,
"scipy>=1.7.2",
# GE added handling for higher version of jinja2
Expand Down Expand Up @@ -229,6 +243,7 @@
iceberg_common = {
# Iceberg Python SDK
"pyiceberg",
*pydantic_no_v2, # because of pyiceberg
"pyarrow>=9.0.0, <13.0.0",
}

Expand Down Expand Up @@ -354,7 +369,11 @@
"mlflow": {"mlflow-skinny>=2.3.0"},
"mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib,
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
"mssql": sql_common | {"sqlalchemy-pytds>=0.3", "pyOpenSSL"},
"mssql": sql_common
| {
"sqlalchemy-pytds>=0.3",
"pyOpenSSL",
},
"mssql-odbc": sql_common | {"pyodbc"},
"mysql": mysql,
# mariadb should have same dependency as mysql
Expand Down Expand Up @@ -473,9 +492,6 @@
"flake8-bugbear==23.3.12",
"isort>=5.7.0",
"mypy==1.0.0",
# pydantic 1.8.2 is incompatible with mypy 0.910.
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
"pydantic>=1.10.0",
*test_api_requirements,
pytest_dep,
"pytest-asyncio>=0.16.0",
Expand Down Expand Up @@ -559,7 +575,7 @@
"kafka-connect",
"ldap",
"mongodb",
"mssql",
"mssql" if sys.version_info >= (3, 8) else None,
"mysql",
"mariadb",
"redash",
Expand Down Expand Up @@ -736,7 +752,22 @@
extras_require={
"base": list(framework_common),
**{
plugin: list(framework_common | dependencies)
plugin: list(
framework_common
| (
plugin_common
if plugin
not in {
"airflow",
"datahub-rest",
"datahub-kafka",
"sync-file-emitter",
"sql-parser",
}
else set()
)
| dependencies
)
for (plugin, dependencies) in plugins.items()
},
"all": list(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Optional

from datahub.configuration import ConfigModel
from datahub.configuration.pydantic_migration_helpers import v1_ConfigModel


class BaseAssertion(ConfigModel):
class BaseAssertion(v1_ConfigModel):
description: Optional[str] = None
Loading

0 comments on commit f6f3b28

Please sign in to comment.