diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 7ae7b87b0f5ce..c5c75de4f7aee 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -87,8 +87,8 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} directory: . fail_ci_if_error: false - flags: airflow-${{ matrix.python-version }}-${{ matrix.extraPythonRequirement }} - name: pytest-airflow + flags: airflow,airflow-${{ matrix.extra_pip_extras }} + name: pytest-airflow-${{ matrix.python-version }}-${{ matrix.extra_pip_requirements }} verbose: true event-file: diff --git a/.github/workflows/spark-smoke-test.yml b/.github/workflows/spark-smoke-test.yml index e463e15243ee3..87fa3c85fc581 100644 --- a/.github/workflows/spark-smoke-test.yml +++ b/.github/workflows/spark-smoke-test.yml @@ -42,8 +42,12 @@ jobs: cache: "pip" - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh + - name: Disk Check + run: df -h . && docker images - name: Remove images run: docker image prune -a -f || true + - name: Disk Check + run: df -h . && docker images - name: Smoke test run: | ./gradlew :metadata-integration:java:spark-lineage:integrationTest \ @@ -54,12 +58,24 @@ jobs: -x :datahub-web-react:yarnBuild \ -x :datahub-web-react:distZip \ -x :datahub-web-react:jar + - name: store logs + if: failure() + run: | + docker ps -a + docker logs datahub-gms >& gms-${{ matrix.test_strategy }}.log || true + docker logs datahub-actions >& actions-${{ matrix.test_strategy }}.log || true + docker logs broker >& broker-${{ matrix.test_strategy }}.log || true + docker logs mysql >& mysql-${{ matrix.test_strategy }}.log || true + docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true + docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true - name: Upload logs uses: actions/upload-artifact@v3 if: failure() with: name: docker logs - path: "docker/build/container-logs/*.log" + path: | + "**/build/container-logs/*.log" + "*.log" - uses: actions/upload-artifact@v3 if: always() with: diff --git a/datahub-frontend/app/auth/AuthUtils.java b/datahub-frontend/app/auth/AuthUtils.java index 84488a43f253e..51bb784c61b3b 100644 --- a/datahub-frontend/app/auth/AuthUtils.java +++ b/datahub-frontend/app/auth/AuthUtils.java @@ -76,6 +76,9 @@ public class AuthUtils { public static final String USE_NONCE = "useNonce"; public static final String READ_TIMEOUT = "readTimeout"; public static final String EXTRACT_JWT_ACCESS_TOKEN_CLAIMS = "extractJwtAccessTokenClaims"; + // Retained for backwards compatibility + public static final String PREFERRED_JWS_ALGORITHM = "preferredJwsAlgorithm"; + public static final String PREFERRED_JWS_ALGORITHM_2 = "preferredJwsAlgorithm2"; /** * Determines whether the inbound request should be forward to downstream Metadata Service. Today, diff --git a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java index bf3384527af11..5de4eba9cb679 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcConfigs.java @@ -226,8 +226,8 @@ public Builder from(final com.typesafe.config.Config configs, final String ssoSe extractJwtAccessTokenClaims = Optional.of(jsonNode.get(EXTRACT_JWT_ACCESS_TOKEN_CLAIMS).asBoolean()); } - if (jsonNode.has(OIDC_PREFERRED_JWS_ALGORITHM)) { - preferredJwsAlgorithm = Optional.of(jsonNode.get(OIDC_PREFERRED_JWS_ALGORITHM).asText()); + if (jsonNode.has(PREFERRED_JWS_ALGORITHM_2)) { + preferredJwsAlgorithm = Optional.of(jsonNode.get(PREFERRED_JWS_ALGORITHM_2).asText()); } else { preferredJwsAlgorithm = Optional.ofNullable(getOptional(configs, OIDC_PREFERRED_JWS_ALGORITHM, null)); diff --git a/datahub-frontend/play.gradle b/datahub-frontend/play.gradle index 1e3a2767852d6..9bd77e5279a91 100644 --- a/datahub-frontend/play.gradle +++ b/datahub-frontend/play.gradle @@ -101,6 +101,9 @@ play { test { useJUnitPlatform() + testLogging.showStandardStreams = true + testLogging.exceptionFormat = 'full' + def playJava17CompatibleJvmArgs = [ "--add-opens=java.base/java.lang=ALL-UNNAMED", //"--add-opens=java.base/java.lang.invoke=ALL-UNNAMED", diff --git a/datahub-frontend/test/security/OidcConfigurationTest.java b/datahub-frontend/test/security/OidcConfigurationTest.java index c1147ae936b3a..8226d4e74cc21 100644 --- a/datahub-frontend/test/security/OidcConfigurationTest.java +++ b/datahub-frontend/test/security/OidcConfigurationTest.java @@ -1,5 +1,6 @@ package security; +import static auth.AuthUtils.*; import static auth.sso.oidc.OidcConfigs.*; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -24,6 +25,7 @@ import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.Test; import org.pac4j.oidc.client.OidcClient; +import org.json.JSONObject; public class OidcConfigurationTest { @@ -317,4 +319,26 @@ public void readTimeoutPropagation() { OidcProvider oidcProvider = new OidcProvider(oidcConfigs); assertEquals(10000, ((OidcClient) oidcProvider.client()).getConfiguration().getReadTimeout()); } + + @Test + public void readPreferredJwsAlgorithmPropagationFromConfig() { + final String SSO_SETTINGS_JSON_STR = new JSONObject().put(PREFERRED_JWS_ALGORITHM, "HS256").toString(); + CONFIG.withValue(OIDC_PREFERRED_JWS_ALGORITHM, ConfigValueFactory.fromAnyRef("RS256")); + OidcConfigs.Builder oidcConfigsBuilder = new OidcConfigs.Builder(); + oidcConfigsBuilder.from(CONFIG, SSO_SETTINGS_JSON_STR); + OidcConfigs oidcConfigs = new OidcConfigs(oidcConfigsBuilder); + OidcProvider oidcProvider = new OidcProvider(oidcConfigs); + assertEquals("RS256", ((OidcClient) oidcProvider.client()).getConfiguration().getPreferredJwsAlgorithm().toString()); + } + + @Test + public void readPreferredJwsAlgorithmPropagationFromJSON() { + final String SSO_SETTINGS_JSON_STR = new JSONObject().put(PREFERRED_JWS_ALGORITHM, "Unused").put(PREFERRED_JWS_ALGORITHM_2, "HS256").toString(); + CONFIG.withValue(OIDC_PREFERRED_JWS_ALGORITHM, ConfigValueFactory.fromAnyRef("RS256")); + OidcConfigs.Builder oidcConfigsBuilder = new OidcConfigs.Builder(); + oidcConfigsBuilder.from(CONFIG, SSO_SETTINGS_JSON_STR); + OidcConfigs oidcConfigs = new OidcConfigs(oidcConfigsBuilder); + OidcProvider oidcProvider = new OidcProvider(oidcConfigs); + assertEquals("HS256", ((OidcClient) oidcProvider.client()).getConfiguration().getPreferredJwsAlgorithm().toString()); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java index 2536f4d2521ee..43b7b5bb102ad 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/assertion/AssertionMapper.java @@ -66,6 +66,7 @@ private static com.linkedin.datahub.graphql.generated.AssertionInfo mapAssertion mapDatasetAssertionInfo(gmsAssertionInfo.getDatasetAssertion()); assertionInfo.setDatasetAssertion(datasetAssertion); } + assertionInfo.setDescription(gmsAssertionInfo.getDescription()); return assertionInfo; } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 2ad4982579380..3ea1b38d3db0d 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -6803,6 +6803,11 @@ type AssertionInfo { Dataset-specific assertion information """ datasetAssertion: DatasetAssertionInfo + + """ + An optional human-readable description of the assertion + """ + description: String } """ diff --git a/datahub-web-react/src/App.tsx b/datahub-web-react/src/App.tsx index 79c9ee91ceaa1..e8910e7dc2ea8 100644 --- a/datahub-web-react/src/App.tsx +++ b/datahub-web-react/src/App.tsx @@ -1,20 +1,19 @@ -import React, { useEffect, useState } from 'react'; +import React from 'react'; import Cookies from 'js-cookie'; import { message } from 'antd'; import { BrowserRouter as Router } from 'react-router-dom'; import { ApolloClient, ApolloProvider, createHttpLink, InMemoryCache, ServerError } from '@apollo/client'; import { onError } from '@apollo/client/link/error'; -import { ThemeProvider } from 'styled-components'; import { Helmet, HelmetProvider } from 'react-helmet-async'; import './App.less'; import { Routes } from './app/Routes'; -import { Theme } from './conf/theme/types'; -import defaultThemeConfig from './conf/theme/theme_light.config.json'; import { PageRoutes } from './conf/Global'; import { isLoggedInVar } from './app/auth/checkAuthStatus'; import { GlobalCfg } from './conf'; import possibleTypesResult from './possibleTypes.generated'; import { ErrorCodes } from './app/shared/constants'; +import CustomThemeProvider from './CustomThemeProvider'; +import { useCustomTheme } from './customThemeContext'; /* Construct Apollo Client @@ -71,33 +70,16 @@ const client = new ApolloClient({ }); export const InnerApp: React.VFC = () => { - const [dynamicThemeConfig, setDynamicThemeConfig] = useState(defaultThemeConfig); - - useEffect(() => { - if (import.meta.env.DEV) { - import(/* @vite-ignore */ `./conf/theme/${import.meta.env.REACT_APP_THEME_CONFIG}`).then((theme) => { - setDynamicThemeConfig(theme); - }); - } else { - // Send a request to the server to get the theme config. - fetch(`/assets/conf/theme/${import.meta.env.REACT_APP_THEME_CONFIG}`) - .then((response) => response.json()) - .then((theme) => { - setDynamicThemeConfig(theme); - }); - } - }, []); - return ( - - {dynamicThemeConfig.content.title} - - + + + {useCustomTheme().theme?.content.title} + - + ); }; diff --git a/datahub-web-react/src/CustomThemeProvider.tsx b/datahub-web-react/src/CustomThemeProvider.tsx new file mode 100644 index 0000000000000..f2e2678a90d8c --- /dev/null +++ b/datahub-web-react/src/CustomThemeProvider.tsx @@ -0,0 +1,32 @@ +import React, { useEffect, useState } from 'react'; +import { ThemeProvider } from 'styled-components'; +import { Theme } from './conf/theme/types'; +import defaultThemeConfig from './conf/theme/theme_light.config.json'; +import { CustomThemeContext } from './customThemeContext'; + +const CustomThemeProvider = ({ children }: { children: React.ReactNode }) => { + const [currentTheme, setTheme] = useState(defaultThemeConfig); + + useEffect(() => { + if (import.meta.env.DEV) { + import(/* @vite-ignore */ `./conf/theme/${import.meta.env.REACT_APP_THEME_CONFIG}`).then((theme) => { + setTheme(theme); + }); + } else { + // Send a request to the server to get the theme config. + fetch(`/assets/conf/theme/${import.meta.env.REACT_APP_THEME_CONFIG}`) + .then((response) => response.json()) + .then((theme) => { + setTheme(theme); + }); + } + }, []); + + return ( + + {children} + + ); +}; + +export default CustomThemeProvider; diff --git a/datahub-web-react/src/Mocks.tsx b/datahub-web-react/src/Mocks.tsx index 03d6f4a624c3d..9f339bb7db548 100644 --- a/datahub-web-react/src/Mocks.tsx +++ b/datahub-web-react/src/Mocks.tsx @@ -297,6 +297,7 @@ export const dataset1 = { embed: null, browsePathV2: { path: [{ name: 'test', entity: null }], __typename: 'BrowsePathV2' }, autoRenderAspects: [], + structuredProperties: null, }; export const dataset2 = { @@ -393,6 +394,7 @@ export const dataset2 = { embed: null, browsePathV2: { path: [{ name: 'test', entity: null }], __typename: 'BrowsePathV2' }, autoRenderAspects: [], + structuredProperties: null, }; export const dataset3 = { @@ -626,6 +628,7 @@ export const dataset3 = { dataProduct: null, lastProfile: null, lastOperation: null, + structuredProperties: null, } as Dataset; export const dataset3WithSchema = { @@ -650,6 +653,7 @@ export const dataset3WithSchema = { globalTags: null, glossaryTerms: null, label: 'hi', + schemaFieldEntity: null, }, { __typename: 'SchemaField', @@ -665,6 +669,7 @@ export const dataset3WithSchema = { globalTags: null, glossaryTerms: null, label: 'hi', + schemaFieldEntity: null, }, ], hash: '', diff --git a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx index 1d4f155f797e0..2cd4cbd6dcb6c 100644 --- a/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx +++ b/datahub-web-react/src/app/entity/dataset/profile/schema/components/SchemaDescriptionField.tsx @@ -86,6 +86,7 @@ type Props = { description: string, ) => Promise, Record> | void>; isEdited?: boolean; + isReadOnly?: boolean; }; const ABBREVIATED_LIMIT = 80; @@ -97,10 +98,11 @@ export default function DescriptionField({ onUpdate, isEdited = false, original, + isReadOnly, }: Props) { const [showAddModal, setShowAddModal] = useState(false); const overLimit = removeMarkdown(description).length > 80; - const isSchemaEditable = React.useContext(SchemaEditableContext); + const isSchemaEditable = React.useContext(SchemaEditableContext) && !isReadOnly; const onCloseModal = () => setShowAddModal(false); const { urn, entityType } = useEntityData(); @@ -140,11 +142,12 @@ export default function DescriptionField({ {expanded || !overLimit ? ( <> {!!description && } - {!!description && ( + {!!description && (EditButton || overLimit) && ( {overLimit && ( { + onClick={(e) => { + e.stopPropagation(); handleExpanded(false); }} > @@ -162,7 +165,8 @@ export default function DescriptionField({ readMore={ <> { + onClick={(e) => { + e.stopPropagation(); handleExpanded(true); }} > @@ -177,7 +181,7 @@ export default function DescriptionField({ )} - {isSchemaEditable && isEdited && (edited)} + {isEdited && (edited)} {showAddModal && (
- + {!isAddDesc && description && original && ( Original:}> diff --git a/datahub-web-react/src/app/entity/shared/components/styled/EntityIcon.tsx b/datahub-web-react/src/app/entity/shared/components/styled/EntityIcon.tsx new file mode 100644 index 0000000000000..bd001b51d53ce --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/components/styled/EntityIcon.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; +import { PlatformIcon } from '../../../../search/filters/utils'; +import { Entity } from '../../../../../types.generated'; +import { IconStyleType } from '../../../Entity'; +import { ANTD_GRAY } from '../../constants'; + +interface Props { + entity: Entity; + size?: number; +} + +export default function EntityIcon({ entity, size = 14 }: Props) { + const entityRegistry = useEntityRegistry(); + const genericEntityProps = entityRegistry.getGenericEntityProperties(entity.type, entity); + const logoUrl = genericEntityProps?.platform?.properties?.logoUrl; + const icon = logoUrl ? ( + + ) : ( + entityRegistry.getIcon(entity.type, size, IconStyleType.ACCENT, ANTD_GRAY[9]) + ); + + return <>{icon}; +} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx index d7b7a4da804ef..a781c732c9de6 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/EntityProfile.tsx @@ -30,7 +30,6 @@ import LineageExplorer from '../../../../lineage/LineageExplorer'; import CompactContext from '../../../../shared/CompactContext'; import DynamicTab from '../../tabs/Entity/weaklyTypedAspects/DynamicTab'; import analytics, { EventType } from '../../../../analytics'; -import { ProfileSidebarResizer } from './sidebar/ProfileSidebarResizer'; import { EntityMenuItems } from '../../EntityDropdown/EntityDropdown'; import { useIsSeparateSiblingsMode } from '../../siblingUtils'; import { EntityActionItem } from '../../entity/EntityActions'; @@ -45,6 +44,7 @@ import { } from '../../../../onboarding/config/LineageGraphOnboardingConfig'; import { useAppConfig } from '../../../../useAppConfig'; import { useUpdateDomainEntityDataOnChange } from '../../../../domain/utils'; +import ProfileSidebar from './sidebar/ProfileSidebar'; type Props = { urn: string; @@ -75,8 +75,6 @@ type Props = { isNameEditable?: boolean; }; -const MAX_SIDEBAR_WIDTH = 800; -const MIN_SIDEBAR_WIDTH = 200; const MAX_COMPACT_WIDTH = 490 - 24 * 2; const ContentContainer = styled.div` @@ -85,6 +83,7 @@ const ContentContainer = styled.div` min-height: 100%; flex: 1; min-width: 0; + overflow: hidden; `; const HeaderAndTabs = styled.div` @@ -113,15 +112,6 @@ const HeaderAndTabsFlex = styled.div` -webkit-box-shadow: 0px 1px 2px rgba(0, 0, 0, 0.75); } `; -const Sidebar = styled.div<{ $width: number }>` - max-height: 100%; - overflow: auto; - width: ${(props) => props.$width}px; - min-width: ${(props) => props.$width}px; - padding-left: 20px; - padding-right: 20px; - padding-bottom: 20px; -`; const Header = styled.div` border-bottom: 1px solid ${ANTD_GRAY[4.5]}; @@ -145,7 +135,7 @@ const defaultTabDisplayConfig = { enabled: (_, _1) => true, }; -const defaultSidebarSection = { +export const DEFAULT_SIDEBAR_SECTION = { visible: (_, _1) => true, }; @@ -176,11 +166,10 @@ export const EntityProfile = ({ const sortedTabs = sortEntityProfileTabs(appConfig.config, entityType, tabsWithDefaults); const sideBarSectionsWithDefaults = sidebarSections.map((sidebarSection) => ({ ...sidebarSection, - display: { ...defaultSidebarSection, ...sidebarSection.display }, + display: { ...DEFAULT_SIDEBAR_SECTION, ...sidebarSection.display }, })); const [shouldRefetchEmbeddedListSearch, setShouldRefetchEmbeddedListSearch] = useState(false); - const [sidebarWidth, setSidebarWidth] = useState(window.innerWidth * 0.25); const entityStepIds: string[] = getOnboardingStepIdsForEntityType(entityType); const lineageGraphStepIds: string[] = [LINEAGE_GRAPH_INTRO_ID, LINEAGE_GRAPH_TIME_FILTER_ID]; const stepIds = isLineageMode ? lineageGraphStepIds : entityStepIds; @@ -344,15 +333,7 @@ export const EntityProfile = ({ - - setSidebarWidth(Math.min(Math.max(width, MIN_SIDEBAR_WIDTH), MAX_SIDEBAR_WIDTH)) - } - initialSize={sidebarWidth} - /> - - - + )} diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx index fbece870706f5..a8d1dceb71ec9 100644 --- a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/EntitySidebar.tsx @@ -36,14 +36,16 @@ const LastIngestedSection = styled.div` type Props = { sidebarSections: EntitySidebarSection[]; + topSection?: EntitySidebarSection; }; -export const EntitySidebar = ({ sidebarSections }: Props) => { +export const EntitySidebar = ({ sidebarSections, topSection }: Props) => { const { entityData } = useEntityData(); const baseEntity = useBaseEntity(); return ( <> + {topSection && } {entityData?.lastIngested && ( diff --git a/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/ProfileSidebar.tsx b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/ProfileSidebar.tsx new file mode 100644 index 0000000000000..b5e6737c16641 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/containers/profile/sidebar/ProfileSidebar.tsx @@ -0,0 +1,77 @@ +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { ProfileSidebarResizer } from './ProfileSidebarResizer'; +import { EntitySidebar } from './EntitySidebar'; +import { EntitySidebarSection } from '../../../types'; + +export const MAX_SIDEBAR_WIDTH = 800; +export const MIN_SIDEBAR_WIDTH = 200; + +const Sidebar = styled.div<{ $width: number; backgroundColor?: string }>` + max-height: 100%; + position: relative; + width: ${(props) => props.$width}px; + min-width: ${(props) => props.$width}px; + ${(props) => props.backgroundColor && `background-color: ${props.backgroundColor};`} +`; + +const ScrollWrapper = styled.div` + overflow: auto; + max-height: 100%; + padding: 0 20px 20px 20px; +`; + +const DEFAULT_SIDEBAR_SECTION = { + visible: (_, _1) => true, +}; + +interface Props { + sidebarSections: EntitySidebarSection[]; + backgroundColor?: string; + topSection?: EntitySidebarSection; + alignLeft?: boolean; +} + +export default function ProfileSidebar({ sidebarSections, backgroundColor, topSection, alignLeft }: Props) { + const sideBarSectionsWithDefaults = sidebarSections.map((sidebarSection) => ({ + ...sidebarSection, + display: { ...DEFAULT_SIDEBAR_SECTION, ...sidebarSection.display }, + })); + + const [sidebarWidth, setSidebarWidth] = useState(window.innerWidth * 0.25); + + if (alignLeft) { + return ( + <> + + + + + + + setSidebarWidth(Math.min(Math.max(width, MIN_SIDEBAR_WIDTH), MAX_SIDEBAR_WIDTH)) + } + initialSize={sidebarWidth} + isSidebarOnLeft + /> + + ); + } + + return ( + <> + + setSidebarWidth(Math.min(Math.max(width, MIN_SIDEBAR_WIDTH), MAX_SIDEBAR_WIDTH)) + } + initialSize={sidebarWidth} + /> + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx index 75027e17b6d0c..28dc3ba5c6ce5 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTab.tsx @@ -76,6 +76,14 @@ export const SchemaTab = ({ properties }: { properties?: any }) => { [schemaMetadata], ); + const hasProperties = useMemo( + () => + entityWithSchema?.schemaMetadata?.fields.some( + (schemaField) => !!schemaField.schemaFieldEntity?.structuredProperties?.properties?.length, + ), + [entityWithSchema], + ); + const [showKeySchema, setShowKeySchema] = useState(false); const [showSchemaAuditView, setShowSchemaAuditView] = useState(false); @@ -190,13 +198,13 @@ export const SchemaTab = ({ properties }: { properties?: any }) => { diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx index 41b92aea93b5a..bd092e86b3584 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/SchemaTable.tsx @@ -21,9 +21,10 @@ import { StyledTable } from '../../../components/styled/StyledTable'; import { SchemaRow } from './components/SchemaRow'; import { FkContext } from './utils/selectedFkContext'; import useSchemaBlameRenderer from './utils/useSchemaBlameRenderer'; -import { ANTD_GRAY } from '../../../constants'; -import MenuColumn from './components/MenuColumn'; +import { ANTD_GRAY, ANTD_GRAY_V2 } from '../../../constants'; import translateFieldPath from '../../../../dataset/profile/schema/utils/translateFieldPath'; +import PropertiesColumn from './components/PropertiesColumn'; +import SchemaFieldDrawer from './components/SchemaFieldDrawer/SchemaFieldDrawer'; const TableContainer = styled.div` overflow: inherit; @@ -41,18 +42,36 @@ const TableContainer = styled.div` padding-bottom: 600px; vertical-align: top; } + + &&& .ant-table-cell { + background-color: inherit; + cursor: pointer; + } + + &&& tbody > tr:hover > td { + background-color: ${ANTD_GRAY_V2[2]}; + } + + &&& .expanded-row { + background-color: ${(props) => props.theme.styles['highlight-color']} !important; + + td { + background-color: ${(props) => props.theme.styles['highlight-color']} !important; + } + } `; export type Props = { rows: Array; schemaMetadata: SchemaMetadata | undefined | null; editableSchemaMetadata?: EditableSchemaMetadata | null; - editMode?: boolean; usageStats?: UsageQueryResult | null; schemaFieldBlameList?: Array | null; showSchemaAuditView: boolean; expandedRowsFromFilter?: Set; filterText?: string; + hasProperties?: boolean; + inputFields?: SchemaField[]; }; const EMPTY_SET: Set = new Set(); @@ -63,56 +82,46 @@ export default function SchemaTable({ schemaMetadata, editableSchemaMetadata, usageStats, - editMode = true, schemaFieldBlameList, showSchemaAuditView, expandedRowsFromFilter = EMPTY_SET, filterText = '', + hasProperties, + inputFields, }: Props): JSX.Element { const hasUsageStats = useMemo(() => (usageStats?.aggregations?.fields?.length || 0) > 0, [usageStats]); const [tableHeight, setTableHeight] = useState(0); - const [tagHoveredIndex, setTagHoveredIndex] = useState(undefined); - const [selectedFkFieldPath, setSelectedFkFieldPath] = - useState(null); + const [selectedFkFieldPath, setSelectedFkFieldPath] = useState(null); + const [expandedDrawerFieldPath, setExpandedDrawerFieldPath] = useState(null); + + const schemaFields = schemaMetadata ? schemaMetadata.fields : inputFields; const descriptionRender = useDescriptionRenderer(editableSchemaMetadata); const usageStatsRenderer = useUsageStatsRenderer(usageStats); const tagRenderer = useTagsAndTermsRenderer( editableSchemaMetadata, - tagHoveredIndex, - setTagHoveredIndex, { showTags: true, showTerms: false, }, filterText, + false, ); const termRenderer = useTagsAndTermsRenderer( editableSchemaMetadata, - tagHoveredIndex, - setTagHoveredIndex, { showTags: false, showTerms: true, }, filterText, + false, ); const schemaTitleRenderer = useSchemaTitleRenderer(schemaMetadata, setSelectedFkFieldPath, filterText); const schemaBlameRenderer = useSchemaBlameRenderer(schemaFieldBlameList); - const onTagTermCell = (record: SchemaField) => ({ - onMouseEnter: () => { - if (editMode) { - setTagHoveredIndex(record.fieldPath); - } - }, - onMouseLeave: () => { - if (editMode) { - setTagHoveredIndex(undefined); - } - }, - }); - const fieldColumn = { width: '22%', title: 'Field', @@ -139,7 +148,6 @@ export default function SchemaTable({ dataIndex: 'globalTags', key: 'tag', render: tagRenderer, - onCell: onTagTermCell, }; const termColumn = { @@ -148,7 +156,6 @@ export default function SchemaTable({ dataIndex: 'globalTags', key: 'tag', render: termRenderer, - onCell: onTagTermCell, }; const blameColumn = { @@ -184,16 +191,20 @@ export default function SchemaTable({ sorter: (sourceA, sourceB) => getCount(sourceA.fieldPath) - getCount(sourceB.fieldPath), }; - const menuColumn = { - width: '5%', - title: '', + const propertiesColumn = { + width: '13%', + title: 'Properties', dataIndex: '', key: 'menu', - render: (field: SchemaField) => , + render: (field: SchemaField) => , }; let allColumns: ColumnsType = [fieldColumn, descriptionColumn, tagColumn, termColumn]; + if (hasProperties) { + allColumns = [...allColumns, propertiesColumn]; + } + if (hasUsageStats) { allColumns = [...allColumns, usageColumn]; } @@ -202,8 +213,6 @@ export default function SchemaTable({ allColumns = [...allColumns, blameColumn]; } - allColumns = [...allColumns, menuColumn]; - const [expandedRows, setExpandedRows] = useState>(new Set()); useEffect(() => { @@ -224,9 +233,15 @@ export default function SchemaTable({ setTableHeight(dimensions.height - TABLE_HEADER_HEIGHT)}> - record.fieldPath === selectedFkFieldPath?.fieldPath ? 'open-fk-row' : '' - } + rowClassName={(record) => { + if (record.fieldPath === selectedFkFieldPath?.fieldPath) { + return 'open-fk-row'; + } + if (expandedDrawerFieldPath === record.fieldPath) { + return 'expanded-row'; + } + return ''; + }} columns={allColumns} dataSource={rows} rowKey="fieldPath" @@ -250,9 +265,27 @@ export default function SchemaTable({ indentSize: 0, }} pagination={false} + onRow={(record) => ({ + onClick: () => { + setExpandedDrawerFieldPath( + expandedDrawerFieldPath === record.fieldPath ? null : record.fieldPath, + ); + }, + style: { + backgroundColor: expandedDrawerFieldPath === record.fieldPath ? `` : 'white', + }, + })} /> + {!!schemaFields && ( + + )} ); } diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/ChildCountLabel.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/ChildCountLabel.tsx new file mode 100644 index 0000000000000..44bd48620649a --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/ChildCountLabel.tsx @@ -0,0 +1,32 @@ +import React from 'react'; +import { Badge } from 'antd'; +import styled from 'styled-components'; + +import { ANTD_GRAY_V2 } from '../../../../constants'; + +type Props = { + count: number; +}; + +const ChildCountBadge = styled(Badge)` + margin-left: 10px; + margin-top: 16px; + margin-bottom: 16px; + &&& .ant-badge-count { + background-color: ${ANTD_GRAY_V2[1]}; + color: ${ANTD_GRAY_V2[8]}; + box-shadow: 0 2px 1px -1px ${ANTD_GRAY_V2[6]}; + border-radius: 4px 4px 4px 4px; + font-size: 12px; + font-weight: 500; + height: 22px; + font-family: 'Manrope'; + } +`; + +export default function ChildCountLabel({ count }: Props) { + const propertyString = count > 1 ? ' properties' : ' property'; + + // eslint-disable-next-line + return ; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertiesColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertiesColumn.tsx new file mode 100644 index 0000000000000..b74de3e94e554 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertiesColumn.tsx @@ -0,0 +1,30 @@ +import { ControlOutlined } from '@ant-design/icons'; +import React from 'react'; +import styled from 'styled-components'; +import { SchemaField } from '../../../../../../../types.generated'; + +const ColumnWrapper = styled.div` + font-size: 14px; +`; + +const StyledIcon = styled(ControlOutlined)` + margin-right: 4px; +`; + +interface Props { + field: SchemaField; +} + +export default function PropertiesColumn({ field }: Props) { + const { schemaFieldEntity } = field; + const numProperties = schemaFieldEntity?.structuredProperties?.properties?.length; + + if (!schemaFieldEntity || !numProperties) return null; + + return ( + + + {numProperties} {numProperties === 1 ? 'property' : 'properties'} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx new file mode 100644 index 0000000000000..366fc4762b210 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/PropertyTypeLabel.tsx @@ -0,0 +1,39 @@ +import React from 'react'; +import { Badge } from 'antd'; +import styled from 'styled-components'; +import { capitalizeFirstLetterOnly } from '../../../../../../shared/textUtil'; +import { DataTypeEntity, SchemaFieldDataType } from '../../../../../../../types.generated'; +import { truncate } from '../../../../utils'; +import { ANTD_GRAY, ANTD_GRAY_V2 } from '../../../../constants'; +import { TypeData } from '../../../Properties/types'; + +type Props = { + type: TypeData; + dataType?: DataTypeEntity; +}; + +export const PropertyTypeBadge = styled(Badge)` + margin: 4px 0 4px 8px; + &&& .ant-badge-count { + background-color: ${ANTD_GRAY[1]}; + color: ${ANTD_GRAY_V2[8]}; + border: 1px solid ${ANTD_GRAY_V2[6]}; + font-size: 12px; + font-weight: 500; + height: 22px; + font-family: 'Manrope'; + } +`; + +export default function PropertyTypeLabel({ type, dataType }: Props) { + // if unable to match type to DataHub, display native type info by default + const { nativeDataType } = type; + const nativeFallback = type.type === SchemaFieldDataType.Null; + + const typeText = + dataType?.info.displayName || + dataType?.info.type || + (nativeFallback ? truncate(250, nativeDataType) : type.type); + + return ; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/DrawerHeader.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/DrawerHeader.tsx new file mode 100644 index 0000000000000..13f8ec869126d --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/DrawerHeader.tsx @@ -0,0 +1,106 @@ +import { CaretLeftOutlined, CaretRightOutlined, CloseOutlined } from '@ant-design/icons'; +import { Button } from 'antd'; +import React, { useEffect } from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY_V2 } from '../../../../../constants'; +import { SchemaField } from '../../../../../../../../types.generated'; +import { pluralize } from '../../../../../../../shared/textUtil'; + +const HeaderWrapper = styled.div` + border-bottom: 1px solid ${ANTD_GRAY_V2[4]}; + display: flex; + justify-content: space-between; + padding: 8px 16px; +`; + +const StyledButton = styled(Button)` + font-size: 12px; + padding: 0; + height: 26px; + width: 26px; + display: flex; + align-items: center; + justify-content: center; + + svg { + height: 10px; + width: 10px; + } +`; + +const FieldIndexText = styled.span` + font-size: 14px; + color: ${ANTD_GRAY_V2[8]}; + margin: 0 8px; +`; + +const ButtonsWrapper = styled.div` + display: flex; + align-items: center; +`; + +interface Props { + schemaFields?: SchemaField[]; + expandedFieldIndex?: number; + setExpandedDrawerFieldPath: (fieldPath: string | null) => void; +} + +export default function DrawerHeader({ schemaFields = [], expandedFieldIndex = 0, setExpandedDrawerFieldPath }: Props) { + function showNextField() { + if (expandedFieldIndex !== undefined && expandedFieldIndex !== -1) { + if (expandedFieldIndex === schemaFields.length - 1) { + const newField = schemaFields[0]; + setExpandedDrawerFieldPath(newField.fieldPath); + } else { + const newField = schemaFields[expandedFieldIndex + 1]; + const { fieldPath } = newField; + setExpandedDrawerFieldPath(fieldPath); + } + } + } + + function showPreviousField() { + if (expandedFieldIndex !== undefined && expandedFieldIndex !== -1) { + if (expandedFieldIndex === 0) { + const newField = schemaFields[schemaFields.length - 1]; + setExpandedDrawerFieldPath(newField.fieldPath); + } else { + const newField = schemaFields[expandedFieldIndex - 1]; + setExpandedDrawerFieldPath(newField.fieldPath); + } + } + } + + function handleArrowKeys(event: KeyboardEvent) { + if (event.code === 'ArrowUp' || event.code === 'ArrowLeft') { + showPreviousField(); + } else if (event.code === 'ArrowDown' || event.code === 'ArrowRight') { + showNextField(); + } + } + + useEffect(() => { + document.addEventListener('keydown', handleArrowKeys); + + return () => document.removeEventListener('keydown', handleArrowKeys); + }); + + return ( + + + + + + + {expandedFieldIndex + 1} of {schemaFields.length} {pluralize(schemaFields.length, 'field')} + + + + + + setExpandedDrawerFieldPath(null)}> + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx new file mode 100644 index 0000000000000..410d2801d51c8 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldDescription.tsx @@ -0,0 +1,115 @@ +import { EditOutlined } from '@ant-design/icons'; +import { Button, message } from 'antd'; +import DOMPurify from 'dompurify'; +import React, { useState } from 'react'; +import styled from 'styled-components'; +import { SectionHeader, StyledDivider } from './components'; +import UpdateDescriptionModal from '../../../../../components/legacy/DescriptionModal'; +import { EditableSchemaFieldInfo, SchemaField, SubResourceType } from '../../../../../../../../types.generated'; +import DescriptionSection from '../../../../../containers/profile/sidebar/AboutSection/DescriptionSection'; +import { useEntityData, useMutationUrn, useRefetch } from '../../../../../EntityContext'; +import { useSchemaRefetch } from '../../SchemaContext'; +import { useUpdateDescriptionMutation } from '../../../../../../../../graphql/mutations.generated'; +import analytics, { EntityActionType, EventType } from '../../../../../../../analytics'; +import SchemaEditableContext from '../../../../../../../shared/SchemaEditableContext'; + +const DescriptionWrapper = styled.div` + display: flex; + justify-content: space-between; +`; + +const EditIcon = styled(Button)` + border: none; + box-shadow: none; + height: 20px; + width: 20px; +`; + +interface Props { + expandedField: SchemaField; + editableFieldInfo?: EditableSchemaFieldInfo; +} + +export default function FieldDescription({ expandedField, editableFieldInfo }: Props) { + const isSchemaEditable = React.useContext(SchemaEditableContext); + const urn = useMutationUrn(); + const refetch = useRefetch(); + const schemaRefetch = useSchemaRefetch(); + const [updateDescription] = useUpdateDescriptionMutation(); + const [isModalVisible, setIsModalVisible] = useState(false); + const { entityType } = useEntityData(); + + const sendAnalytics = () => { + analytics.event({ + type: EventType.EntityActionEvent, + actionType: EntityActionType.UpdateSchemaDescription, + entityType, + entityUrn: urn, + }); + }; + + const refresh: any = () => { + refetch?.(); + schemaRefetch?.(); + }; + + const onSuccessfulMutation = () => { + refresh(); + sendAnalytics(); + message.destroy(); + message.success({ content: 'Updated!', duration: 2 }); + }; + + const onFailMutation = (e) => { + message.destroy(); + if (e instanceof Error) message.error({ content: `Proposal Failed! \n ${e.message || ''}`, duration: 2 }); + }; + + const generateMutationVariables = (updatedDescription: string) => ({ + variables: { + input: { + description: DOMPurify.sanitize(updatedDescription), + resourceUrn: urn, + subResource: expandedField.fieldPath, + subResourceType: SubResourceType.DatasetField, + }, + }, + }); + + const displayedDescription = editableFieldInfo?.description || expandedField.description; + + return ( + <> + +
+ Description + +
+ {isSchemaEditable && ( + setIsModalVisible(true)} + icon={} + /> + )} + {isModalVisible && ( + setIsModalVisible(false)} + onSubmit={(updatedDescription: string) => { + message.loading({ content: 'Updating...' }); + updateDescription(generateMutationVariables(updatedDescription)) + .then(onSuccessfulMutation) + .catch(onFailMutation); + setIsModalVisible(false); + }} + isAddDesc={!displayedDescription} + /> + )} +
+ + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldHeader.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldHeader.tsx new file mode 100644 index 0000000000000..7b06ff43393ef --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldHeader.tsx @@ -0,0 +1,60 @@ +import { Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import translateFieldPath from '../../../../../../dataset/profile/schema/utils/translateFieldPath'; +import TypeLabel from '../TypeLabel'; +import PrimaryKeyLabel from '../PrimaryKeyLabel'; +import PartitioningKeyLabel from '../PartitioningKeyLabel'; +import NullableLabel from '../NullableLabel'; +import MenuColumn from '../MenuColumn'; +import { ANTD_GRAY_V2 } from '../../../../../constants'; +import { SchemaField } from '../../../../../../../../types.generated'; + +const FieldHeaderWrapper = styled.div` + padding: 16px; + display: flex; + justify-content: space-between; + border-bottom: 1px solid ${ANTD_GRAY_V2[4]}; +`; + +const FieldName = styled(Typography.Text)` + font-size: 16px; + font-family: 'Roboto Mono', monospace; +`; + +const TypesSection = styled.div` + margin-left: -4px; + margin-top: 8px; +`; + +const NameTypesWrapper = styled.div` + overflow: hidden; +`; + +const MenuWrapper = styled.div` + margin-right: 5px; +`; + +interface Props { + expandedField: SchemaField; +} + +export default function FieldHeader({ expandedField }: Props) { + const displayName = translateFieldPath(expandedField.fieldPath || ''); + return ( + + + {displayName} + + + {expandedField.isPartOfKey && } + {expandedField.isPartitioningKey && } + {expandedField.nullable && } + + + + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx new file mode 100644 index 0000000000000..8c88cdce95f06 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldProperties.tsx @@ -0,0 +1,70 @@ +import React from 'react'; +import styled from 'styled-components'; +import { SchemaField, StdDataType } from '../../../../../../../../types.generated'; +import { SectionHeader, StyledDivider } from './components'; +import { mapStructuredPropertyValues } from '../../../../Properties/useStructuredProperties'; +import StructuredPropertyValue from '../../../../Properties/StructuredPropertyValue'; + +const PropertyTitle = styled.div` + font-size: 14px; + font-weight: 700; + margin-bottom: 4px; +`; + +const PropertyWrapper = styled.div` + margin-bottom: 12px; +`; + +const PropertiesWrapper = styled.div` + padding-left: 16px; +`; + +const StyledList = styled.ul` + padding-left: 24px; +`; + +interface Props { + expandedField: SchemaField; +} + +export default function FieldProperties({ expandedField }: Props) { + const { schemaFieldEntity } = expandedField; + + if (!schemaFieldEntity?.structuredProperties?.properties?.length) return null; + + return ( + <> + Properties + + {schemaFieldEntity.structuredProperties.properties.map((structuredProp) => { + const isRichText = + structuredProp.structuredProperty.definition.valueType?.info.type === StdDataType.RichText; + const valuesData = mapStructuredPropertyValues(structuredProp); + const hasMultipleValues = valuesData.length > 1; + + return ( + + {structuredProp.structuredProperty.definition.displayName} + {hasMultipleValues ? ( + + {valuesData.map((value) => ( +
  • + +
  • + ))} +
    + ) : ( + <> + {valuesData.map((value) => ( + + ))} + + )} +
    + ); + })} +
    + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTags.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTags.tsx new file mode 100644 index 0000000000000..c071506d3ad79 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTags.tsx @@ -0,0 +1,33 @@ +import React from 'react'; +import { EditableSchemaMetadata, GlobalTags, SchemaField } from '../../../../../../../../types.generated'; +import useTagsAndTermsRenderer from '../../utils/useTagsAndTermsRenderer'; +import { SectionHeader, StyledDivider } from './components'; +import SchemaEditableContext from '../../../../../../../shared/SchemaEditableContext'; + +interface Props { + expandedField: SchemaField; + editableSchemaMetadata?: EditableSchemaMetadata | null; +} + +export default function FieldTags({ expandedField, editableSchemaMetadata }: Props) { + const isSchemaEditable = React.useContext(SchemaEditableContext); + const tagRenderer = useTagsAndTermsRenderer( + editableSchemaMetadata, + { + showTags: true, + showTerms: false, + }, + '', + isSchemaEditable, + ); + + return ( + <> + Tags +
    + {tagRenderer(expandedField.globalTags as GlobalTags, expandedField)} +
    + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTerms.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTerms.tsx new file mode 100644 index 0000000000000..94349836539a6 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldTerms.tsx @@ -0,0 +1,34 @@ +import React from 'react'; +import { EditableSchemaMetadata, GlobalTags, SchemaField } from '../../../../../../../../types.generated'; +import useTagsAndTermsRenderer from '../../utils/useTagsAndTermsRenderer'; +import { SectionHeader, StyledDivider } from './components'; +import SchemaEditableContext from '../../../../../../../shared/SchemaEditableContext'; + +interface Props { + expandedField: SchemaField; + editableSchemaMetadata?: EditableSchemaMetadata | null; +} + +export default function FieldTerms({ expandedField, editableSchemaMetadata }: Props) { + const isSchemaEditable = React.useContext(SchemaEditableContext); + const termRenderer = useTagsAndTermsRenderer( + editableSchemaMetadata, + { + showTags: false, + showTerms: true, + }, + '', + isSchemaEditable, + ); + + return ( + <> + Glossary Terms + {/* pass in globalTags since this is a shared component, tags will not be shown or used */} +
    + {termRenderer(expandedField.globalTags as GlobalTags, expandedField)} +
    + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldUsageStats.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldUsageStats.tsx new file mode 100644 index 0000000000000..2f7288904b2df --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/FieldUsageStats.tsx @@ -0,0 +1,59 @@ +import React, { useMemo } from 'react'; +import styled from 'styled-components'; +import { GetDatasetQuery } from '../../../../../../../../graphql/dataset.generated'; +import { useBaseEntity } from '../../../../../EntityContext'; +import { ANTD_GRAY_V2 } from '../../../../../constants'; +import { SectionHeader, StyledDivider } from './components'; +import { pathMatchesNewPath } from '../../../../../../dataset/profile/schema/utils/utils'; +import { UsageBar } from '../../utils/useUsageStatsRenderer'; +import { SchemaField } from '../../../../../../../../types.generated'; + +const USAGE_BAR_MAX_WIDTH = 100; + +const UsageBarWrapper = styled.div` + display: flex; + align-items: center; +`; + +const UsageBarBackground = styled.div` + background-color: ${ANTD_GRAY_V2[3]}; + border-radius: 2px; + height: 4px; + width: ${USAGE_BAR_MAX_WIDTH}px; +`; + +const UsageTextWrapper = styled.span` + margin-left: 8px; +`; + +interface Props { + expandedField: SchemaField; +} + +export default function FieldUsageStats({ expandedField }: Props) { + const baseEntity = useBaseEntity(); + const usageStats = baseEntity?.dataset?.usageStats; + const hasUsageStats = useMemo(() => (usageStats?.aggregations?.fields?.length || 0) > 0, [usageStats]); + const maxFieldUsageCount = useMemo( + () => Math.max(...(usageStats?.aggregations?.fields?.map((field) => field?.count || 0) || [])), + [usageStats], + ); + const relevantUsageStats = usageStats?.aggregations?.fields?.find((fieldStats) => + pathMatchesNewPath(fieldStats?.fieldName, expandedField.fieldPath), + ); + + if (!hasUsageStats || !relevantUsageStats) return null; + + return ( + <> + Usage + + + + + {relevantUsageStats.count || 0} queries / month + + + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldDrawer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldDrawer.tsx new file mode 100644 index 0000000000000..7a5366f04e983 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/SchemaFieldDrawer.tsx @@ -0,0 +1,83 @@ +import { Drawer } from 'antd'; +import React, { useMemo } from 'react'; +import styled from 'styled-components'; +import DrawerHeader from './DrawerHeader'; +import FieldHeader from './FieldHeader'; +import FieldDescription from './FieldDescription'; +import { EditableSchemaMetadata, SchemaField } from '../../../../../../../../types.generated'; +import { pathMatchesNewPath } from '../../../../../../dataset/profile/schema/utils/utils'; +import FieldUsageStats from './FieldUsageStats'; +import FieldTags from './FieldTags'; +import FieldTerms from './FieldTerms'; +import FieldProperties from './FieldProperties'; + +const StyledDrawer = styled(Drawer)` + position: absolute; + + &&& .ant-drawer-body { + padding: 0; + } + + &&& .ant-drawer-content-wrapper { + border-left: 3px solid ${(props) => props.theme.styles['primary-color']}; + } +`; + +const MetadataSections = styled.div` + padding: 16px 24px; +`; + +interface Props { + schemaFields: SchemaField[]; + editableSchemaMetadata?: EditableSchemaMetadata | null; + expandedDrawerFieldPath: string | null; + setExpandedDrawerFieldPath: (fieldPath: string | null) => void; +} + +export default function SchemaFieldDrawer({ + schemaFields, + editableSchemaMetadata, + expandedDrawerFieldPath, + setExpandedDrawerFieldPath, +}: Props) { + const expandedFieldIndex = useMemo( + () => schemaFields.findIndex((row) => row.fieldPath === expandedDrawerFieldPath), + [expandedDrawerFieldPath, schemaFields], + ); + const expandedField = + expandedFieldIndex !== undefined && expandedFieldIndex !== -1 ? schemaFields[expandedFieldIndex] : undefined; + const editableFieldInfo = editableSchemaMetadata?.editableSchemaFieldInfo.find((candidateEditableFieldInfo) => + pathMatchesNewPath(candidateEditableFieldInfo.fieldPath, expandedField?.fieldPath), + ); + + return ( + setExpandedDrawerFieldPath(null)} + getContainer={() => document.getElementById('entity-profile-sidebar') as HTMLElement} + contentWrapperStyle={{ width: '100%', boxShadow: 'none' }} + mask={false} + maskClosable={false} + placement="right" + closable={false} + > + + {expandedField && ( + <> + + + + + + + + + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/components.ts b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/components.ts new file mode 100644 index 0000000000000..0348336d649b5 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/components/SchemaFieldDrawer/components.ts @@ -0,0 +1,12 @@ +import { Divider } from 'antd'; +import styled from 'styled-components'; + +export const SectionHeader = styled.div` + font-size: 16px; + font-weight: 600; + margin-bottom: 8px; +`; + +export const StyledDivider = styled(Divider)` + margin: 12px 0; +`; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx index d80143f4bb82c..5f2b5d23771c0 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useDescriptionRenderer.tsx @@ -48,8 +48,8 @@ export default function useDescriptionRenderer(editableSchemaMetadata: EditableS }, }).then(refresh) } + isReadOnly /> ); }; } -// diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx index a57344e5733b4..207deb31d7ab7 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useTagsAndTermsRenderer.tsx @@ -2,15 +2,14 @@ import React from 'react'; import { EditableSchemaMetadata, EntityType, GlobalTags, SchemaField } from '../../../../../../../types.generated'; import TagTermGroup from '../../../../../../shared/tags/TagTermGroup'; import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/utils'; -import { useMutationUrn, useRefetch } from '../../../../EntityContext'; import { useSchemaRefetch } from '../SchemaContext'; +import { useMutationUrn, useRefetch } from '../../../../EntityContext'; export default function useTagsAndTermsRenderer( editableSchemaMetadata: EditableSchemaMetadata | null | undefined, - tagHoveredIndex: string | undefined, - setTagHoveredIndex: (index: string | undefined) => void, options: { showTags: boolean; showTerms: boolean }, filterText: string, + canEdit: boolean, ) { const urn = useMutationUrn(); const refetch = useRefetch(); @@ -27,24 +26,21 @@ export default function useTagsAndTermsRenderer( ); return ( -
    - setTagHoveredIndex(undefined)} - entityUrn={urn} - entityType={EntityType.Dataset} - entitySubresource={record.fieldPath} - highlightText={filterText} - refetch={refresh} - /> -
    + ); }; return tagAndTermRender; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useUsageStatsRenderer.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useUsageStatsRenderer.tsx index 393783c4ca787..e6b58eeb376f9 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useUsageStatsRenderer.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Schema/utils/useUsageStatsRenderer.tsx @@ -7,7 +7,7 @@ import { pathMatchesNewPath } from '../../../../../dataset/profile/schema/utils/ const USAGE_BAR_MAX_WIDTH = 50; -const UsageBar = styled.div<{ width: number }>` +export const UsageBar = styled.div<{ width: number }>` width: ${(props) => props.width}px; height: 4px; background-color: ${geekblue[3]}; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/Assertions.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/Assertions.tsx index 68660164ee877..b3086d7867012 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/Assertions.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/Assertions.tsx @@ -35,6 +35,8 @@ const getAssertionsStatusSummary = (assertions: Array) => { /** * Component used for rendering the Validations Tab on the Dataset Page. + * + * TODO: Note that only the legacy DATASET assertions are supported for viewing as of today. */ export const Assertions = () => { const { urn, entityData } = useEntityData(); @@ -47,7 +49,9 @@ export const Assertions = () => { const assertions = (combinedData && combinedData.dataset?.assertions?.assertions?.map((assertion) => assertion as Assertion)) || []; - const filteredAssertions = assertions.filter((assertion) => !removedUrns.includes(assertion.urn)); + const filteredAssertions = assertions.filter( + (assertion) => !removedUrns.includes(assertion.urn) && !!assertion.info?.datasetAssertion, + ); // Pre-sort the list of assertions based on which has been most recently executed. assertions.sort(sortAssertions); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionDescription.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionDescription.tsx index a91d11d1e9887..daebfd5597588 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionDescription.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionDescription.tsx @@ -19,6 +19,7 @@ const ViewLogicButton = styled(Button)` `; type Props = { + description?: string; assertionInfo: DatasetAssertionInfo; }; @@ -319,18 +320,20 @@ const TOOLTIP_MAX_WIDTH = 440; * * For example, Column 'X' values are in [1, 2, 3] */ -export const DatasetAssertionDescription = ({ assertionInfo }: Props) => { +export const DatasetAssertionDescription = ({ description, assertionInfo }: Props) => { const { scope, aggregation, fields, operator, parameters, nativeType, nativeParameters, logic } = assertionInfo; const [isLogicVisible, setIsLogicVisible] = useState(false); /** * Build a description component from a) input (aggregation, inputs) b) the operator text */ - const description = ( + const descriptionFragment = ( <> - - {getAggregationText(scope, aggregation, fields)}{' '} - {getOperatorText(operator, parameters || undefined, nativeType || undefined)} - + {description || ( + + {getAggregationText(scope, aggregation, fields)}{' '} + {getOperatorText(operator, parameters || undefined, nativeType || undefined)} + + )} ); @@ -349,7 +352,7 @@ export const DatasetAssertionDescription = ({ assertionInfo }: Props) => { } > -
    {description}
    +
    {descriptionFragment}
    {logic && (
    setIsLogicVisible(true)} type="link"> diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx index 05fc2d1c496db..3eccfb8931fc0 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/DatasetAssertionsList.tsx @@ -83,6 +83,7 @@ export const DatasetAssertionsList = ({ assertions, onDelete }: Props) => { type: assertion.info?.type, platform: assertion.platform, datasetAssertionInfo: assertion.info?.datasetAssertion, + description: assertion.info?.description, lastExecTime: assertion.runEvents?.runEvents?.length && assertion.runEvents.runEvents[0].timestampMillis, lastExecResult: assertion.runEvents?.runEvents?.length && @@ -101,6 +102,7 @@ export const DatasetAssertionsList = ({ assertions, onDelete }: Props) => { const resultColor = (record.lastExecResult && getResultColor(record.lastExecResult)) || 'default'; const resultText = (record.lastExecResult && getResultText(record.lastExecResult)) || 'No Evaluations'; const resultIcon = (record.lastExecResult && getResultIcon(record.lastExecResult)) || ; + const { description } = record; return (
    @@ -111,7 +113,10 @@ export const DatasetAssertionsList = ({ assertions, onDelete }: Props) => {
    - +
    ); }, @@ -146,12 +151,7 @@ export const DatasetAssertionsList = ({ assertions, onDelete }: Props) => { - - } - trigger={['click']} - > + } trigger={['click']}> diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx index bd2e410fb30d9..db56c092c8ccd 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx @@ -40,10 +40,11 @@ type EditorProps = { onChange?: (md: string) => void; className?: string; doNotFocus?: boolean; + dataTestId?: string; }; export const Editor = forwardRef((props: EditorProps, ref) => { - const { content, readOnly, onChange, className } = props; + const { content, readOnly, onChange, className, dataTestId } = props; const { manager, state, getContext } = useRemirror({ extensions: () => [ new BlockquoteExtension(), @@ -98,7 +99,7 @@ export const Editor = forwardRef((props: EditorProps, ref) => { }, [readOnly, content]); return ( - + {!readOnly && ( diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/CardinalityLabel.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/CardinalityLabel.tsx new file mode 100644 index 0000000000000..14d3b2166554a --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/CardinalityLabel.tsx @@ -0,0 +1,43 @@ +import { Tooltip } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { PropertyCardinality, StructuredPropertyEntity } from '../../../../../types.generated'; +import { PropertyTypeBadge } from '../Dataset/Schema/components/PropertyTypeLabel'; +import { getStructuredPropertyValue } from '../../utils'; + +const Header = styled.div` + font-size: 10px; +`; + +const List = styled.ul` + padding: 0 24px; + max-height: 500px; + overflow: auto; +`; + +interface Props { + structuredProperty: StructuredPropertyEntity; +} + +export default function CardinalityLabel({ structuredProperty }: Props) { + const labelText = + structuredProperty.definition.cardinality === PropertyCardinality.Single ? 'Single-Select' : 'Multi-Select'; + + return ( + +
    Property Options
    + + {structuredProperty.definition.allowedValues?.map((value) => ( +
  • {getStructuredPropertyValue(value.value)}
  • + ))} +
    + + } + > + +
    + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/NameColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/NameColumn.tsx new file mode 100644 index 0000000000000..3b718c1ec30ed --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/NameColumn.tsx @@ -0,0 +1,87 @@ +import { Tooltip, Typography } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import Highlight from 'react-highlighter'; +import { PropertyRow } from './types'; +import ChildCountLabel from '../Dataset/Schema/components/ChildCountLabel'; +import PropertyTypeLabel from '../Dataset/Schema/components/PropertyTypeLabel'; +import StructuredPropertyTooltip from './StructuredPropertyTooltip'; +import CardinalityLabel from './CardinalityLabel'; + +const ParentNameText = styled(Typography.Text)` + color: #373d44; + font-size: 16px; + font-family: Manrope; + font-weight: 600; + line-height: 20px; + word-wrap: break-word; + padding-left: 16px; + display: flex; + align-items: center; +`; + +const ChildNameText = styled(Typography.Text)` + align-self: stretch; + color: #373d44; + font-size: 14px; + font-family: Manrope; + font-weight: 500; + line-height: 18px; + word-wrap: break-word; + padding-left: 16px; + display: flex; + align-items: center; +`; + +const NameLabelWrapper = styled.span` + display: inline-flex; + align-items: center; + flex-wrap: wrap; +`; + +interface Props { + propertyRow: PropertyRow; + filterText?: string; +} + +export default function NameColumn({ propertyRow, filterText }: Props) { + const { structuredProperty } = propertyRow; + return ( + <> + {propertyRow.children ? ( + + + {propertyRow.displayName} + + {propertyRow.childrenCount ? : } + + ) : ( + + + ) : ( + '' + ) + } + > + + {propertyRow.displayName} + + + {propertyRow.type ? ( + + ) : ( + + )} + {structuredProperty?.definition.allowedValues && ( + + )} + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/PropertiesTab.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/PropertiesTab.tsx index 277096e1c09cb..01d1145877e3b 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/PropertiesTab.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/PropertiesTab.tsx @@ -1,52 +1,79 @@ -import React from 'react'; -import { Typography } from 'antd'; import styled from 'styled-components'; - -import { ANTD_GRAY } from '../../constants'; -import { StyledTable } from '../../components/styled/StyledTable'; +import React, { useState } from 'react'; +import ExpandIcon from '../Dataset/Schema/components/ExpandIcon'; +import { StyledTable as Table } from '../../components/styled/StyledTable'; import { useEntityData } from '../../EntityContext'; +import { PropertyRow } from './types'; +import useStructuredProperties from './useStructuredProperties'; +import { getFilteredCustomProperties, mapCustomPropertiesToPropertyRows } from './utils'; +import ValuesColumn from './ValuesColumn'; +import NameColumn from './NameColumn'; +import TabHeader from './TabHeader'; +import useUpdateExpandedRowsFromFilter from './useUpdateExpandedRowsFromFilter'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; -const NameText = styled(Typography.Text)` - font-family: 'Roboto Mono', monospace; - font-weight: 600; - font-size: 12px; - color: ${ANTD_GRAY[9]}; -`; - -const ValueText = styled(Typography.Text)` - font-family: 'Roboto Mono', monospace; - font-weight: 400; - font-size: 12px; - color: ${ANTD_GRAY[8]}; -`; +const StyledTable = styled(Table)` + &&& .ant-table-cell-with-append { + padding: 4px; + } +` as typeof Table; export const PropertiesTab = () => { + const [filterText, setFilterText] = useState(''); const { entityData } = useEntityData(); + const entityRegistry = useEntityRegistry(); const propertyTableColumns = [ { - width: 210, + width: '40%', title: 'Name', - dataIndex: 'key', - sorter: (a, b) => a?.key.localeCompare(b?.key || '') || 0, defaultSortOrder: 'ascend', - render: (name: string) => {name}, + render: (propertyRow: PropertyRow) => , }, { title: 'Value', - dataIndex: 'value', - render: (value: string) => {value}, + render: (propertyRow: PropertyRow) => , }, ]; + const { structuredPropertyRows, expandedRowsFromFilter } = useStructuredProperties(entityRegistry, filterText); + const customProperties = getFilteredCustomProperties(filterText, entityData) || []; + const customPropertyRows = mapCustomPropertiesToPropertyRows(customProperties); + const dataSource: PropertyRow[] = structuredPropertyRows.concat(customPropertyRows); + + const [expandedRows, setExpandedRows] = useState>(new Set()); + + useUpdateExpandedRowsFromFilter({ expandedRowsFromFilter, setExpandedRows }); + return ( - + <> + + { + if (expanded) { + setExpandedRows((previousRows) => new Set(previousRows.add(record.qualifiedName))); + } else { + setExpandedRows((previousRows) => { + previousRows.delete(record.qualifiedName); + return new Set(previousRows); + }); + } + }, + indentSize: 0, + }} + /> + ); }; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyTooltip.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyTooltip.tsx new file mode 100644 index 0000000000000..be0f443ce01b2 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyTooltip.tsx @@ -0,0 +1,31 @@ +import React from 'react'; +import styled from 'styled-components'; +import { StructuredPropertyEntity } from '../../../../../types.generated'; + +const ContentWrapper = styled.div` + font-size: 12px; +`; + +const Header = styled.div` + font-size: 10px; +`; + +const Description = styled.div` + padding-left: 16px; +`; + +interface Props { + structuredProperty: StructuredPropertyEntity; +} + +export default function StructuredPropertyTooltip({ structuredProperty }: Props) { + return ( + +
    Structured Property
    +
    {structuredProperty.definition.displayName || structuredProperty.definition.qualifiedName}
    + {structuredProperty.definition.description && ( + {structuredProperty.definition.description} + )} +
    + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx new file mode 100644 index 0000000000000..a8b4e6607b25e --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/StructuredPropertyValue.tsx @@ -0,0 +1,69 @@ +import Icon from '@ant-design/icons/lib/components/Icon'; +import React from 'react'; +import Highlight from 'react-highlighter'; +import { Typography } from 'antd'; +import styled from 'styled-components'; +import { ValueColumnData } from './types'; +import { ANTD_GRAY } from '../../constants'; +import { useEntityRegistry } from '../../../../useEntityRegistry'; +import ExternalLink from '../../../../../images/link-out.svg?react'; +import MarkdownViewer, { MarkdownView } from '../../components/legacy/MarkdownViewer'; +import EntityIcon from '../../components/styled/EntityIcon'; + +const ValueText = styled(Typography.Text)` + font-family: 'Manrope'; + font-weight: 400; + font-size: 14px; + color: ${ANTD_GRAY[9]}; + display: block; + + ${MarkdownView} { + font-size: 14px; + } +`; + +const StyledIcon = styled(Icon)` + margin-left: 6px; +`; + +const IconWrapper = styled.span` + margin-right: 4px; +`; + +interface Props { + value: ValueColumnData; + isRichText?: boolean; + filterText?: string; +} + +export default function StructuredPropertyValue({ value, isRichText, filterText }: Props) { + const entityRegistry = useEntityRegistry(); + + return ( + + {value.entity ? ( + <> + + + + {entityRegistry.getDisplayName(value.entity.type, value.entity)} + + + + + ) : ( + <> + {isRichText ? ( + + ) : ( + {value.value?.toString()} + )} + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/TabHeader.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/TabHeader.tsx new file mode 100644 index 0000000000000..9e0b4992d9c78 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/TabHeader.tsx @@ -0,0 +1,32 @@ +import { SearchOutlined } from '@ant-design/icons'; +import { Input } from 'antd'; +import React from 'react'; +import styled from 'styled-components'; +import { ANTD_GRAY } from '../../constants'; + +const StyledInput = styled(Input)` + border-radius: 70px; + max-width: 300px; +`; + +const TableHeader = styled.div` + padding: 8px 16px; + border-bottom: 1px solid ${ANTD_GRAY[4.5]}; +`; + +interface Props { + setFilterText: (text: string) => void; +} + +export default function TabHeader({ setFilterText }: Props) { + return ( + + setFilterText(e.target.value)} + allowClear + prefix={} + /> + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/ValuesColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/ValuesColumn.tsx new file mode 100644 index 0000000000000..b050e06f96de8 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/ValuesColumn.tsx @@ -0,0 +1,24 @@ +import React from 'react'; +import { PropertyRow } from './types'; +import { StdDataType } from '../../../../../types.generated'; +import StructuredPropertyValue from './StructuredPropertyValue'; + +interface Props { + propertyRow: PropertyRow; + filterText?: string; +} + +export default function ValuesColumn({ propertyRow, filterText }: Props) { + const { values } = propertyRow; + const isRichText = propertyRow.dataType?.info.type === StdDataType.RichText; + + return ( + <> + {values ? ( + values.map((v) => ) + ) : ( + + )} + + ); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/utils.test.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/utils.test.ts new file mode 100644 index 0000000000000..512510732d716 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/__tests__/utils.test.ts @@ -0,0 +1,87 @@ +import { getTestEntityRegistry } from '../../../../../../utils/test-utils/TestPageContainer'; +import { PropertyRow } from '../types'; +import { filterStructuredProperties } from '../utils'; + +describe('filterSchemaRows', () => { + const testEntityRegistry = getTestEntityRegistry(); + const rows = [ + { + displayName: 'Has PII', + qualifiedName: 'io.acryl.ads.data_protection.has_pii', + values: [{ value: 'yes', entity: null }], + }, + { + displayName: 'Discovery Date Utc', + qualifiedName: 'io.acryl.ads.change_management.discovery_date_utc', + values: [{ value: '2023-10-31', entity: null }], + }, + { + displayName: 'Link Data Location', + qualifiedName: 'io.acryl.ads.context.data_location', + values: [{ value: 'New York City', entity: null }], + }, + { + displayName: 'Number Prop', + qualifiedName: 'io.acryl.ads.number', + values: [{ value: 100, entity: null }], + }, + ] as PropertyRow[]; + + it('should properly filter structured properties based on field name', () => { + const filterText = 'has pi'; + const { filteredRows, expandedRowsFromFilter } = filterStructuredProperties( + testEntityRegistry, + rows, + filterText, + ); + + expect(filteredRows).toMatchObject([ + { + displayName: 'Has PII', + qualifiedName: 'io.acryl.ads.data_protection.has_pii', + values: [{ value: 'yes', entity: null }], + }, + ]); + expect(expandedRowsFromFilter).toMatchObject( + new Set(['io', 'io.acryl', 'io.acryl.ads', 'io.acryl.ads.data_protection']), + ); + }); + + it('should properly filter structured properties based on field value', () => { + const filterText = 'new york'; + const { filteredRows, expandedRowsFromFilter } = filterStructuredProperties( + testEntityRegistry, + rows, + filterText, + ); + + expect(filteredRows).toMatchObject([ + { + displayName: 'Link Data Location', + qualifiedName: 'io.acryl.ads.context.data_location', + values: [{ value: 'New York City', entity: null }], + }, + ]); + expect(expandedRowsFromFilter).toMatchObject( + new Set(['io', 'io.acryl', 'io.acryl.ads', 'io.acryl.ads.context']), + ); + }); + + it('should properly filter structured properties based on field value even for numbers', () => { + const filterText = '100'; + const { filteredRows, expandedRowsFromFilter } = filterStructuredProperties( + testEntityRegistry, + rows, + filterText, + ); + + expect(filteredRows).toMatchObject([ + { + displayName: 'Number Prop', + qualifiedName: 'io.acryl.ads.number', + values: [{ value: 100, entity: null }], + }, + ]); + expect(expandedRowsFromFilter).toMatchObject(new Set(['io', 'io.acryl', 'io.acryl.ads'])); + }); +}); diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/types.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/types.ts new file mode 100644 index 0000000000000..b93ba886d5a64 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/types.ts @@ -0,0 +1,25 @@ +import { DataTypeEntity, Entity, StructuredPropertyEntity } from '../../../../../types.generated'; + +export interface ValueColumnData { + value: string | number | null; + entity: Entity | null; +} + +export interface TypeData { + type: string; + nativeDataType: string; +} + +export interface PropertyRow { + displayName: string; + qualifiedName: string; + values?: ValueColumnData[]; + children?: PropertyRow[]; + childrenCount?: number; + parent?: PropertyRow; + depth?: number; + type?: TypeData; + dataType?: DataTypeEntity; + isParentRow?: boolean; + structuredProperty?: StructuredPropertyEntity; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx new file mode 100644 index 0000000000000..5600d7c3e8498 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/useStructuredProperties.tsx @@ -0,0 +1,215 @@ +import { PropertyValue, StructuredPropertiesEntry } from '../../../../../types.generated'; +import EntityRegistry from '../../../EntityRegistry'; +import { useEntityData } from '../../EntityContext'; +import { GenericEntityProperties } from '../../types'; +import { getStructuredPropertyValue } from '../../utils'; +import { PropertyRow } from './types'; +import { filterStructuredProperties } from './utils'; + +const typeNameToType = { + StringValue: { type: 'string', nativeDataType: 'text' }, + NumberValue: { type: 'number', nativeDataType: 'float' }, +}; + +export function mapStructuredPropertyValues(structuredPropertiesEntry: StructuredPropertiesEntry) { + return structuredPropertiesEntry.values + .filter((value) => !!value) + .map((value) => ({ + value: getStructuredPropertyValue(value as PropertyValue), + entity: + structuredPropertiesEntry.valueEntities?.find( + (entity) => entity?.urn === getStructuredPropertyValue(value as PropertyValue), + ) || null, + })); +} + +// map the properties map into a list of PropertyRow objects to render in a table +function getStructuredPropertyRows(entityData?: GenericEntityProperties | null) { + const structuredPropertyRows: PropertyRow[] = []; + + entityData?.structuredProperties?.properties?.forEach((structuredPropertiesEntry) => { + const { displayName, qualifiedName } = structuredPropertiesEntry.structuredProperty.definition; + structuredPropertyRows.push({ + displayName: displayName || qualifiedName, + qualifiedName, + values: mapStructuredPropertyValues(structuredPropertiesEntry), + dataType: structuredPropertiesEntry.structuredProperty.definition.valueType, + structuredProperty: structuredPropertiesEntry.structuredProperty, + type: + structuredPropertiesEntry.values[0] && structuredPropertiesEntry.values[0].__typename + ? { + type: typeNameToType[structuredPropertiesEntry.values[0].__typename].type, + nativeDataType: typeNameToType[structuredPropertiesEntry.values[0].__typename].nativeDataType, + } + : undefined, + }); + }); + + return structuredPropertyRows; +} + +export function findAllSubstrings(s: string): Array { + const substrings: Array = []; + + for (let i = 0; i < s.length; i++) { + if (s[i] === '.') { + substrings.push(s.substring(0, i)); + } + } + substrings.push(s); + return substrings; +} + +export function createParentPropertyRow(displayName: string, qualifiedName: string): PropertyRow { + return { + displayName, + qualifiedName, + isParentRow: true, + }; +} + +export function identifyAndAddParentRows(rows?: Array): Array { + /** + * This function takes in an array of PropertyRow objects and determines which rows are parents. These parents need + * to be extracted in order to organize the rows into a properly nested structure later on. The final product returned + * is a list of parent rows, without values or children assigned. + */ + const qualifiedNames: Array = []; + + // Get list of fqns + if (rows) { + rows.forEach((row) => { + qualifiedNames.push(row.qualifiedName); + }); + } + + const finalParents: PropertyRow[] = []; + const finalParentNames = new Set(); + + // Loop through list of fqns and find all substrings. + // e.g. a.b.c.d becomes a, a.b, a.b.c, a.b.c.d + qualifiedNames.forEach((fqn) => { + let previousCount: number | null = null; + let previousParentName = ''; + + const substrings = findAllSubstrings(fqn); + + // Loop through substrings and count how many other fqns have that substring in them. Use this to determine + // if a property should be nested. If the count is equal then we should not nest, because there's no split + // that would tell us to nest. If the count is not equal, we should nest the child properties. + for (let index = 0; index < substrings.length; index++) { + const token = substrings[index]; + const currentCount = qualifiedNames.filter((name) => name.startsWith(token)).length; + + // If we're at the beginning of the path and there is no nesting, break + if (index === 0 && currentCount === 1) { + break; + } + + // Add previous fqn, or,previousParentName, if we have found a viable parent path + if (previousCount !== null && previousCount !== currentCount) { + if (!finalParentNames.has(previousParentName)) { + const parent: PropertyRow = createParentPropertyRow(previousParentName, previousParentName); + parent.childrenCount = previousCount; + finalParentNames.add(previousParentName); + finalParents.push(parent); + } + } + + previousCount = currentCount; + previousParentName = token; + } + }); + + return finalParents; +} + +export function groupByParentProperty(rows?: Array): Array { + /** + * This function takes in an array of PropertyRow objects, representing parent and child properties. Parent properties + * will not have values, but child properties will. It organizes the rows into the parent and child structure and + * returns a list of PropertyRow objects representing it. + */ + const outputRows: Array = []; + const outputRowByPath = {}; + + if (rows) { + // Iterate through all rows + for (let rowIndex = 0; rowIndex < rows.length; rowIndex++) { + let parentRow: null | PropertyRow = null; + const row = { children: undefined, ...rows[rowIndex], depth: 0 }; + + // Iterate through a row's characters, and split the row's path into tokens + // e.g. a, b, c for the example a.b.c + for (let j = rowIndex - 1; j >= 0; j--) { + const rowTokens = row.qualifiedName.split('.'); + let parentPath: null | string = null; + let previousParentPath = rowTokens.slice(0, rowTokens.length - 1).join('.'); + + // Iterate through a row's path backwards, and check if the previous row's path has been seen. If it has, + // populate parentRow. If not, move on to the next path token. + // e.g. for a.b.c.d, first evaluate a.b.c to see if it has been seen. If it hasn't, move to a.b + for ( + let lastParentTokenIndex = rowTokens.length - 2; + lastParentTokenIndex >= 0; + --lastParentTokenIndex + ) { + const lastParentToken: string = rowTokens[lastParentTokenIndex]; + if (lastParentToken && Object.keys(outputRowByPath).includes(previousParentPath)) { + parentPath = rowTokens.slice(0, lastParentTokenIndex + 1).join('.'); + break; + } + previousParentPath = rowTokens.slice(0, lastParentTokenIndex).join('.'); + } + + if (parentPath && rows[j].qualifiedName === parentPath) { + parentRow = outputRowByPath[rows[j].qualifiedName]; + break; + } + } + + // If the parent row exists in the ouput, add the current row as a child. If not, add the current row + // to the final output. + if (parentRow) { + row.depth = (parentRow.depth || 0) + 1; + row.parent = parentRow; + if (row.isParentRow) { + row.displayName = row.displayName.replace(`${parentRow.displayName}.`, ''); + } + parentRow.children = [...(parentRow.children || []), row]; + } else { + outputRows.push(row); + } + outputRowByPath[row.qualifiedName] = row; + } + } + return outputRows; +} + +export default function useStructuredProperties(entityRegistry: EntityRegistry, filterText?: string) { + const { entityData } = useEntityData(); + + let structuredPropertyRowsRaw = getStructuredPropertyRows(entityData); + const parentRows = identifyAndAddParentRows(structuredPropertyRowsRaw); + + structuredPropertyRowsRaw = [...structuredPropertyRowsRaw, ...parentRows]; + + const { filteredRows, expandedRowsFromFilter } = filterStructuredProperties( + entityRegistry, + structuredPropertyRowsRaw, + filterText, + ); + + // Sort by fqn before nesting algorithm + const copy = [...filteredRows].sort((a, b) => { + return a.qualifiedName.localeCompare(b.qualifiedName); + }); + + // group properties by path + const structuredPropertyRows = groupByParentProperty(copy); + + return { + structuredPropertyRows, + expandedRowsFromFilter: expandedRowsFromFilter as Set, + }; +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/useUpdateExpandedRowsFromFilter.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/useUpdateExpandedRowsFromFilter.ts new file mode 100644 index 0000000000000..0dbe762c537db --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/useUpdateExpandedRowsFromFilter.ts @@ -0,0 +1,23 @@ +import { useEffect } from 'react'; +import { isEqual } from 'lodash'; +import usePrevious from '../../../../shared/usePrevious'; + +interface Props { + expandedRowsFromFilter: Set; + setExpandedRows: React.Dispatch>>; +} + +export default function useUpdateExpandedRowsFromFilter({ expandedRowsFromFilter, setExpandedRows }: Props) { + const previousExpandedRowsFromFilter = usePrevious(expandedRowsFromFilter); + + useEffect(() => { + if (!isEqual(expandedRowsFromFilter, previousExpandedRowsFromFilter)) { + setExpandedRows((previousRows) => { + const finalRowsSet = new Set(); + expandedRowsFromFilter.forEach((row) => finalRowsSet.add(row)); + previousRows.forEach((row) => finalRowsSet.add(row)); + return finalRowsSet as Set; + }); + } + }, [expandedRowsFromFilter, previousExpandedRowsFromFilter, setExpandedRows]); +} diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/utils.ts b/datahub-web-react/src/app/entity/shared/tabs/Properties/utils.ts new file mode 100644 index 0000000000000..91870e2e37e07 --- /dev/null +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/utils.ts @@ -0,0 +1,68 @@ +import { CustomPropertiesEntry } from '../../../../../types.generated'; +import EntityRegistry from '../../../EntityRegistry'; +import { GenericEntityProperties } from '../../types'; +import { PropertyRow, ValueColumnData } from './types'; + +export function mapCustomPropertiesToPropertyRows(customProperties: CustomPropertiesEntry[]) { + return (customProperties?.map((customProp) => ({ + displayName: customProp.key, + values: [{ value: customProp.value || '' }], + type: { type: 'string', nativeDataType: 'string' }, + })) || []) as PropertyRow[]; +} + +function matchesName(name: string, filterText: string) { + return name.toLocaleLowerCase().includes(filterText.toLocaleLowerCase()); +} + +function matchesAnyFromValues(values: ValueColumnData[], filterText: string, entityRegistry: EntityRegistry) { + return values.some( + (value) => + matchesName(value.value?.toString() || '', filterText) || + matchesName(value.entity ? entityRegistry.getDisplayName(value.entity.type, value.entity) : '', filterText), + ); +} + +export function getFilteredCustomProperties(filterText: string, entityData?: GenericEntityProperties | null) { + return entityData?.customProperties?.filter( + (property) => matchesName(property.key, filterText) || matchesName(property.value || '', filterText), + ); +} + +export function filterStructuredProperties( + entityRegistry: EntityRegistry, + propertyRows: PropertyRow[], + filterText?: string, +) { + if (!propertyRows) return { filteredRows: [], expandedRowsFromFilter: new Set() }; + if (!filterText) return { filteredRows: propertyRows, expandedRowsFromFilter: new Set() }; + const formattedFilterText = filterText.toLocaleLowerCase(); + + const finalQualifiedNames = new Set(); + const expandedRowsFromFilter = new Set(); + + propertyRows.forEach((row) => { + // if we match on the qualified name (maybe from a parent) do not filter out + if (matchesName(row.qualifiedName, formattedFilterText)) { + finalQualifiedNames.add(row.qualifiedName); + } + // if we match specifically on this property (not just its parent), add and expand all parents + if ( + matchesName(row.displayName, formattedFilterText) || + matchesAnyFromValues(row.values || [], formattedFilterText, entityRegistry) + ) { + finalQualifiedNames.add(row.qualifiedName); + + const splitFieldPath = row.qualifiedName.split('.'); + splitFieldPath.reduce((previous, current) => { + finalQualifiedNames.add(previous); + expandedRowsFromFilter.add(previous); + return `${previous}.${current}`; + }); + } + }); + + const filteredRows = propertyRows.filter((row) => finalQualifiedNames.has(row.qualifiedName)); + + return { filteredRows, expandedRowsFromFilter }; +} diff --git a/datahub-web-react/src/app/entity/shared/types.ts b/datahub-web-react/src/app/entity/shared/types.ts index d4e3965cd66f5..47cad4a69096d 100644 --- a/datahub-web-react/src/app/entity/shared/types.ts +++ b/datahub-web-react/src/app/entity/shared/types.ts @@ -38,6 +38,7 @@ import { BrowsePathV2, DataJobInputOutput, ParentDomainsResult, + StructuredProperties, } from '../../../types.generated'; import { FetchedEntity } from '../../lineage/types'; @@ -84,6 +85,7 @@ export type GenericEntityProperties = { platform?: Maybe; dataPlatformInstance?: Maybe; customProperties?: Maybe; + structuredProperties?: Maybe; institutionalMemory?: Maybe; schemaMetadata?: Maybe; externalUrl?: Maybe; diff --git a/datahub-web-react/src/app/entity/shared/utils.ts b/datahub-web-react/src/app/entity/shared/utils.ts index a158cc9b7c119..217aaaaf9dde8 100644 --- a/datahub-web-react/src/app/entity/shared/utils.ts +++ b/datahub-web-react/src/app/entity/shared/utils.ts @@ -1,6 +1,6 @@ import { Maybe } from 'graphql/jsutils/Maybe'; -import { Entity, EntityType, EntityRelationshipsResult, DataProduct } from '../../../types.generated'; +import { Entity, EntityType, EntityRelationshipsResult, DataProduct, PropertyValue } from '../../../types.generated'; import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; import { GenericEntityProperties } from './types'; @@ -130,3 +130,13 @@ export function getDataProduct(dataProductResult: Maybe void; +}>({ theme: undefined, updateTheme: (_) => null }); + +export function useCustomTheme() { + return useContext(CustomThemeContext); +} diff --git a/datahub-web-react/src/graphql/assertion.graphql b/datahub-web-react/src/graphql/assertion.graphql index d4015fcebdb3e..0b64c4c8d6ddd 100644 --- a/datahub-web-react/src/graphql/assertion.graphql +++ b/datahub-web-react/src/graphql/assertion.graphql @@ -46,6 +46,7 @@ fragment assertionDetails on Assertion { } logic } + description } } diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index bb06ccb90a46d..e901c9af554ad 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -245,6 +245,11 @@ fragment nonRecursiveDatasetFields on Dataset { actor } } + structuredProperties { + properties { + ...structuredPropertiesFields + } + } editableProperties { description } @@ -709,6 +714,15 @@ fragment schemaFieldFields on SchemaField { glossaryTerms { ...glossaryTerms } + schemaFieldEntity { + urn + type + structuredProperties { + properties { + ...structuredPropertiesFields + } + } + } } fragment schemaMetadataFields on SchemaMetadata { @@ -1163,6 +1177,69 @@ fragment entityDisplayNameFields on Entity { } } +fragment structuredPropertyFields on StructuredPropertyEntity { + urn + type + definition { + displayName + qualifiedName + description + cardinality + valueType { + info { + type + displayName + } + } + entityTypes { + info { + type + } + } + cardinality + typeQualifier { + allowedTypes { + urn + type + info { + type + displayName + } + } + } + allowedValues { + value { + ... on StringValue { + stringValue + } + ... on NumberValue { + numberValue + } + } + description + } + } +} + +fragment structuredPropertiesFields on StructuredPropertiesEntry { + structuredProperty { + ...structuredPropertyFields + } + values { + ... on StringValue { + stringValue + } + ... on NumberValue { + numberValue + } + } + valueEntities { + urn + type + ...entityDisplayNameFields + } +} + fragment autoRenderAspectFields on RawAspect { aspectName payload diff --git a/docker/build.gradle b/docker/build.gradle index cc95e12f26f76..b14739104a9f1 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -8,15 +8,17 @@ import com.avast.gradle.dockercompose.tasks.ComposeDownForced apply from: "../gradle/versioning/versioning.gradle" ext { - quickstart_modules = [ + backend_profile_modules = [ ':docker:elasticsearch-setup', ':docker:mysql-setup', ':docker:kafka-setup', ':datahub-upgrade', + ':metadata-service:war', + ] + quickstart_modules = backend_profile_modules + [ ':metadata-jobs:mce-consumer-job', ':metadata-jobs:mae-consumer-job', - ':metadata-service:war', - ':datahub-frontend', + ':datahub-frontend' ] debug_modules = quickstart_modules - [':metadata-jobs:mce-consumer-job', @@ -90,9 +92,14 @@ dockerCompose { removeVolumes = false } + /** + * The smallest disk footprint required for Spark integration tests + * + * No frontend, mae, mce, or other services + */ quickstartSlim { isRequiredBy(tasks.named('quickstartSlim')) - composeAdditionalArgs = ['--profile', 'quickstart-consumers'] + composeAdditionalArgs = ['--profile', 'quickstart-backend'] environment.put 'DATAHUB_VERSION', "v${version}" environment.put "DATAHUB_ACTIONS_IMAGE", "acryldata/datahub-ingestion" @@ -132,7 +139,7 @@ tasks.getByName('quickstartComposeUp').dependsOn( tasks.getByName('quickstartPgComposeUp').dependsOn( pg_quickstart_modules.collect { it + ':dockerTag' }) tasks.getByName('quickstartSlimComposeUp').dependsOn( - ([':docker:datahub-ingestion'] + quickstart_modules) + ([':docker:datahub-ingestion'] + backend_profile_modules) .collect { it + ':dockerTag' }) tasks.getByName('quickstartDebugComposeUp').dependsOn( debug_modules.collect { it + ':dockerTagDebug' } diff --git a/docs/managed-datahub/datahub-api/entity-events-api.md b/docs/managed-datahub/datahub-api/entity-events-api.md index 07fa252249452..23499904d5505 100644 --- a/docs/managed-datahub/datahub-api/entity-events-api.md +++ b/docs/managed-datahub/datahub-api/entity-events-api.md @@ -563,7 +563,7 @@ This event is emitted when an Assertion has been run has succeeded on DataHub. "parameters": { "runResult": "SUCCESS", "runId": "123", - "aserteeUrn": "urn:li:dataset:def" + "asserteeUrn": "urn:li:dataset:def" }, "auditStamp": { "actor": "urn:li:corpuser:jdoe", @@ -808,4 +808,36 @@ These are the common parameters for all parameters. "time": 1649953100653 } } -``` \ No newline at end of file +``` + +### Incident Change Event + +This event is emitted when an Incident has been created or it's status changes. + +#### Header + +
    CategoryOperationEntity Types
    INCIDENTACTIVE, RESOLVEDincident
    + +#### Parameters + +| Name | Type | Description | Optional | +|--------------| ------ |---------------------------------------------------| -------- | +| entities | String | The list of entities associated with the incident | False | + +#### Sample Event + +``` +{ + "entityUrn": "urn:li:incident:16ff200a-0ac5-4a7d-bbab-d4bdb4f831f9", + "entityType": "incident", + "category": "INCIDENT", + "operation": "ACTIVE", + "parameters": { + "entities": "[urn:li:dataset:abc, urn:li:dataset:abc2]", + }, + "auditStamp": { + "actor": "urn:li:corpuser:jdoe", + "time": 1649953100653 + } +} +``` diff --git a/docs/managed-datahub/observe/custom-sql-assertions.md b/docs/managed-datahub/observe/custom-sql-assertions.md index 11e9aa807b616..581b542688134 100644 --- a/docs/managed-datahub/observe/custom-sql-assertions.md +++ b/docs/managed-datahub/observe/custom-sql-assertions.md @@ -117,7 +117,7 @@ The **Assertion Description**: This is a human-readable description of the Asser ### Prerequisites 1. **Permissions**: To create or delete Custom SQL Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions`, `Edit Monitors`, **and the additional `Edit SQL Assertion Monitors`** privileges for the entity. This is granted to Entity owners by default. 2. **Data Platform Connection**: In order to create a Custom SQL Assertion, you'll need to have an **Ingestion Source** configured to your Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. diff --git a/docs/managed-datahub/observe/freshness-assertions.md b/docs/managed-datahub/observe/freshness-assertions.md index 416db6a65343e..9704f475b1587 100644 --- a/docs/managed-datahub/observe/freshness-assertions.md +++ b/docs/managed-datahub/observe/freshness-assertions.md @@ -107,12 +107,14 @@ Change Source types vary by the platform, but generally fall into these categori - **Audit Log** (Default): A metadata API or Table that is exposed by the Data Warehouse which contains captures information about the operations that have been performed to each Table. It is usually efficient to check, but some useful operations are not - fully supported across all major Warehouse platforms. + fully supported across all major Warehouse platforms. Note that for Databricks, [this option](https://docs.databricks.com/en/delta/history.html) + is only available for tables stored in Delta format. - **Information Schema**: A system Table that is exposed by the Data Warehouse which contains live information about the Databases and Tables stored inside the Data Warehouse. It is usually efficient to check, but lacks detailed information about the _type_ - of change that was last made to a specific table (e.g. the operation itself - INSERT, UPDATE, DELETE, number of impacted rows, etc) - + of change that was last made to a specific table (e.g. the operation itself - INSERT, UPDATE, DELETE, number of impacted rows, etc). + Note that for Databricks, [this option](https://docs.databricks.com/en/delta/table-details.html) is only available for tables stored in Delta format. + - **Last Modified Column**: A Date or Timestamp column that represents the last time that a specific _row_ was touched or updated. Adding a Last Modified Column to each warehouse Table is a pattern is often used for existing use cases around change management. If this change source is used, a query will be issued to the Table to search for rows that have been modified within a specific @@ -128,8 +130,11 @@ Change Source types vary by the platform, but generally fall into these categori This relies on Operations being reported to DataHub, either via ingestion or via use of the DataHub APIs (see [Report Operation via API](#reporting-operations-via-api)). Note if you have not configured an ingestion source through DataHub, then this may be the only option available. By default, any operation type found will be considered a valid change. Use the **Operation Types** dropdown when selecting this option to specify which operation types should be considered valid changes. You may choose from one of DataHub's standard Operation Types, or specify a "Custom" Operation Type by typing in the name of the Operation Type. - Using either of the column value approaches (**Last Modified Column** or **High Watermark Column**) to determine whether a Table has changed can be useful because it can be customized to determine whether specific types of important changes have been made to a given Table. - Because it does not involve system warehouse tables, it is also easily portable across Data Warehouse and Data Lake providers. + - **File Metadata** (Databricks Only): A column that is exposed by Databricks for both Unity Catalog and Hive Metastore based tables + which includes information about the last time that a file for the table was changed. Read more about it [here](https://docs.databricks.com/en/ingestion/file-metadata-column.html). + + Using either of the column value approaches (**Last Modified Column** or **High Watermark Column**) to determine whether a Table has changed can be useful because it can be customized to determine whether specific types of changes have been made to a given Table. + And because this type of assertion does not involve system warehouse tables, they are easily portable across Data Warehouse and Data Lake providers. Freshness Assertions also have an off switch: they can be started or stopped at any time with the click of button. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/ConfigEntitySpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/ConfigEntitySpec.java index b235e2adcae11..8bd89071e299d 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/ConfigEntitySpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/ConfigEntitySpec.java @@ -3,10 +3,12 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; import com.linkedin.metadata.models.annotation.EntityAnnotation; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -19,6 +21,7 @@ public class ConfigEntitySpec implements EntitySpec { private final Map _aspectSpecs; private List _searchableFieldSpecs; + private Map> searchableFieldTypeMap; public ConfigEntitySpec( @Nonnull final String entityName, @@ -89,4 +92,13 @@ public List getSearchableFieldSpecs() { return _searchableFieldSpecs; } + + @Override + public Map> getSearchableFieldTypes() { + if (searchableFieldTypeMap == null) { + searchableFieldTypeMap = EntitySpec.super.getSearchableFieldTypes(); + } + + return searchableFieldTypeMap; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/DefaultEntitySpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/DefaultEntitySpec.java index 5db8ca264f69d..2546674f9835c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/DefaultEntitySpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/DefaultEntitySpec.java @@ -3,10 +3,12 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; import com.linkedin.metadata.models.annotation.EntityAnnotation; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -24,6 +26,7 @@ public class DefaultEntitySpec implements EntitySpec { private final TyperefDataSchema _aspectTyperefSchema; private List _searchableFieldSpecs; + private Map> searchableFieldTypeMap; public DefaultEntitySpec( @Nonnull final Collection aspectSpecs, @@ -102,4 +105,13 @@ public List getSearchableFieldSpecs() { return _searchableFieldSpecs; } + + @Override + public Map> getSearchableFieldTypes() { + if (searchableFieldTypeMap == null) { + searchableFieldTypeMap = EntitySpec.super.getSearchableFieldTypes(); + } + + return searchableFieldTypeMap; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java index e4c9dd55a3b4a..9a75cc1f751d3 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpec.java @@ -3,8 +3,13 @@ import com.linkedin.data.schema.RecordDataSchema; import com.linkedin.data.schema.TyperefDataSchema; import com.linkedin.metadata.models.annotation.EntityAnnotation; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; /** A specification of a DataHub Entity */ @@ -36,6 +41,41 @@ default List getSearchableFieldSpecs() { .collect(Collectors.toList()); } + default Map> getSearchableFieldTypes() { + // Get additional fields and mint SearchableFieldSpecs for them + Map> fieldSpecMap = new HashMap<>(); + for (SearchableFieldSpec fieldSpec : getSearchableFieldSpecs()) { + SearchableAnnotation searchableAnnotation = fieldSpec.getSearchableAnnotation(); + if (searchableAnnotation.getNumValuesFieldName().isPresent()) { + String fieldName = searchableAnnotation.getNumValuesFieldName().get(); + Set fieldSet = new HashSet<>(); + fieldSet.add(SearchableAnnotation.FieldType.COUNT); + fieldSpecMap.put(fieldName, fieldSet); + } + if (searchableAnnotation.getHasValuesFieldName().isPresent()) { + String fieldName = searchableAnnotation.getHasValuesFieldName().get(); + Set fieldSet = new HashSet<>(); + fieldSet.add(SearchableAnnotation.FieldType.BOOLEAN); + fieldSpecMap.put(fieldName, fieldSet); + } + } + fieldSpecMap.putAll( + getSearchableFieldSpecs().stream() + .collect( + Collectors.toMap( + searchableFieldSpec -> + searchableFieldSpec.getSearchableAnnotation().getFieldName(), + searchableFieldSpec -> + new HashSet<>( + Collections.singleton( + searchableFieldSpec.getSearchableAnnotation().getFieldType())), + (set1, set2) -> { + set1.addAll(set2); + return set1; + }))); + return fieldSpecMap; + } + default List getSearchScoreFieldSpecs() { return getAspectSpecs().stream() .map(AspectSpec::getSearchScoreFieldSpecs) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java index 580134f566871..54f2206798da0 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/EntitySpecBuilder.java @@ -248,9 +248,9 @@ public AspectSpec buildAspectSpec( // Extract SearchScore Field Specs final SearchScoreFieldSpecExtractor searchScoreFieldSpecExtractor = new SearchScoreFieldSpecExtractor(); - final DataSchemaRichContextTraverser searcScoreFieldSpecTraverser = + final DataSchemaRichContextTraverser searchScoreFieldSpecTraverser = new DataSchemaRichContextTraverser(searchScoreFieldSpecExtractor); - searcScoreFieldSpecTraverser.traverse(processedSearchScoreResult.getResultSchema()); + searchScoreFieldSpecTraverser.traverse(processedSearchScoreResult.getResultSchema()); final SchemaAnnotationProcessor.SchemaAnnotationProcessResult processedRelationshipResult = SchemaAnnotationProcessor.process( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index 41043995a3b77..9aed29ab8595e 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -91,7 +91,7 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) .filter(Files::isRegularFile) .filter(f -> f.endsWith("entity-registry.yml") || f.endsWith("entity-registry.yaml")) .collect(Collectors.toList()); - if (yamlFiles.size() == 0) { + if (yamlFiles.isEmpty()) { throw new EntityRegistryException( String.format( "Did not find an entity registry (entity_registry.yaml/yml) under %s", diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 650a1cd41066e..0dcd0420d4df8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -58,7 +58,7 @@ private void validateEntitySpec(EntitySpec entitySpec, final ValidationResult va validationResult.setValid(false); validationResult .getValidationFailures() - .add(String.format("Key aspect is missing in entity {}", entitySpec.getName())); + .add(String.format("Key aspect is missing in entity %s", entitySpec.getName())); } } @@ -86,7 +86,7 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) } // Merge Event Specs - if (patchEntityRegistry.getEventSpecs().size() > 0) { + if (!patchEntityRegistry.getEventSpecs().isEmpty()) { eventNameToSpec.putAll(patchEntityRegistry.getEventSpecs()); } // TODO: Validate that the entity registries don't have conflicts among each other @@ -116,19 +116,18 @@ private void checkMergeable( if (existingEntitySpec != null) { existingEntitySpec .getAspectSpecMap() - .entrySet() .forEach( - aspectSpecEntry -> { - if (newEntitySpec.hasAspect(aspectSpecEntry.getKey())) { + (key, value) -> { + if (newEntitySpec.hasAspect(key)) { CompatibilityResult result = CompatibilityChecker.checkCompatibility( - aspectSpecEntry.getValue().getPegasusSchema(), - newEntitySpec.getAspectSpec(aspectSpecEntry.getKey()).getPegasusSchema(), + value.getPegasusSchema(), + newEntitySpec.getAspectSpec(key).getPegasusSchema(), new CompatibilityOptions()); if (result.isError()) { log.error( "{} schema is not compatible with previous schema due to {}", - aspectSpecEntry.getKey(), + key, result.getMessages()); // we want to continue processing all aspects to collect all failures validationResult.setValid(false); @@ -137,11 +136,11 @@ private void checkMergeable( .add( String.format( "%s schema is not compatible with previous schema due to %s", - aspectSpecEntry.getKey(), result.getMessages())); + key, result.getMessages())); } else { log.info( "{} schema is compatible with previous schema due to {}", - aspectSpecEntry.getKey(), + key, result.getMessages()); } } @@ -222,7 +221,7 @@ public PluginFactory getPluginFactory() { @Setter @Getter - private class ValidationResult { + private static class ValidationResult { boolean valid = true; List validationFailures = new ArrayList<>(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java index b82b905c50004..b4fc4193e7263 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java @@ -71,19 +71,17 @@ public class PatchEntityRegistry implements EntityRegistry { @Override public String toString() { StringBuilder sb = new StringBuilder("PatchEntityRegistry[" + "identifier=" + identifier + ';'); - entityNameToSpec.entrySet().stream() - .forEach( - entry -> - sb.append("[entityName=") - .append(entry.getKey()) - .append(";aspects=[") - .append( - entry.getValue().getAspectSpecs().stream() - .map(spec -> spec.getName()) - .collect(Collectors.joining(","))) - .append("]]")); - eventNameToSpec.entrySet().stream() - .forEach(entry -> sb.append("[eventName=").append(entry.getKey()).append("]")); + entityNameToSpec.forEach( + (key1, value1) -> + sb.append("[entityName=") + .append(key1) + .append(";aspects=[") + .append( + value1.getAspectSpecs().stream() + .map(AspectSpec::getName) + .collect(Collectors.joining(","))) + .append("]]")); + eventNameToSpec.forEach((key, value) -> sb.append("[eventName=").append(key).append("]")); return sb.toString(); } @@ -119,7 +117,7 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) .filter(Files::isRegularFile) .filter(f -> f.endsWith("entity-registry.yml") || f.endsWith("entity-registry.yaml")) .collect(Collectors.toList()); - if (yamlFiles.size() == 0) { + if (yamlFiles.isEmpty()) { throw new EntityRegistryException( String.format( "Did not find an entity registry (entity-registry.yaml/yml) under %s", @@ -175,7 +173,7 @@ private PatchEntityRegistry( entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); } catch (IOException e) { - e.printStackTrace(); + log.error("Unable to read Patch configuration.", e); throw new IllegalArgumentException( String.format( "Error while reading config file in path %s: %s", configFileStream, e.getMessage())); diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java index 8fefa2fe00ae8..22aeddb6ac65f 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/SnapshotEntityRegistry.java @@ -120,7 +120,7 @@ public AspectTemplateEngine getAspectTemplateEngine() { } @Override - public EventSpec getEventSpec(final String ignored) { + public EventSpec getEventSpec(@Nonnull final String ignored) { return null; } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java index d9cf8fd2603a8..8b043569dd16a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java @@ -189,7 +189,7 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { testEntityInfo.getPegasusSchema().getFullName()); // Assert on Searchable Fields - assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 11); + assertEquals(testEntityInfo.getSearchableFieldSpecs().size(), 12); assertEquals( "customProperties", testEntityInfo @@ -340,6 +340,20 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { .get(new PathSpec("doubleField").toString()) .getSearchableAnnotation() .getFieldType()); + assertEquals( + "removed", + testEntityInfo + .getSearchableFieldSpecMap() + .get(new PathSpec("removed").toString()) + .getSearchableAnnotation() + .getFieldName()); + assertEquals( + SearchableAnnotation.FieldType.BOOLEAN, + testEntityInfo + .getSearchableFieldSpecMap() + .get(new PathSpec("removed").toString()) + .getSearchableAnnotation() + .getFieldType()); // Assert on Relationship Fields assertEquals(4, testEntityInfo.getRelationshipFieldSpecs().size()); diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index dacf12dc020df..9555f92c8831d 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -108,7 +108,7 @@ task testQuick(type: Exec, dependsOn: installDevTest) { inputs.files(project.fileTree(dir: "src/", include: "**/*.py")) inputs.files(project.fileTree(dir: "tests/")) commandLine 'bash', '-x', '-c', - "source ${venv_name}/bin/activate && pytest -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" + "source ${venv_name}/bin/activate && pytest --cov-config=setup.cfg --cov-report xml:coverage_quick.xml -vv --continue-on-collection-errors --junit-xml=junit.quick.xml" } diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py index e16563400e397..0e1ef69ebf18c 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_datahub_listener_module.py @@ -29,6 +29,6 @@ def on_task_instance_failed(previous_state, task_instance, session): if hasattr(_listener, "on_dag_run_running"): @hookimpl - def on_dag_run_running(dag_run, session): + def on_dag_run_running(dag_run, msg): assert _listener - _listener.on_dag_run_running(dag_run, session) + _listener.on_dag_run_running(dag_run, msg) diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index f84b7b56f6119..32bbe88481636 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -199,8 +199,8 @@ def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata: platform=platform, platform_instance=None, env=builder.DEFAULT_ENV, - database=default_database, - schema=default_schema, + default_db=default_database, + default_schema=default_schema, ) self.log.debug(f"Got sql lineage {sql_parsing_result}") diff --git a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py index d2c45e723f1b0..994816ff037c8 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/conftest.py +++ b/metadata-ingestion-modules/airflow-plugin/tests/conftest.py @@ -1,6 +1,17 @@ +import pathlib +import site + + def pytest_addoption(parser): parser.addoption( "--update-golden-files", action="store_true", default=False, ) + + +# See https://coverage.readthedocs.io/en/latest/subprocess.html#configuring-python-for-sub-process-measurement +coverage_startup_code = "import coverage; coverage.process_startup()" +site_packages_dir = pathlib.Path(site.getsitepackages()[0]) +pth_file_path = site_packages_dir / "datahub_coverage_startup.pth" +pth_file_path.write_text(coverage_startup_code) diff --git a/metadata-ingestion/developing.md b/metadata-ingestion/developing.md index d1eef21974f1d..fc3a689124b2c 100644 --- a/metadata-ingestion/developing.md +++ b/metadata-ingestion/developing.md @@ -10,7 +10,7 @@ Also take a look at the guide to [adding a source](./adding-source.md). ### Requirements 1. Python 3.7+ must be installed in your host environment. -2. Java8 (gradle won't work with newer versions) +2. Java 17 (gradle won't work with newer or older versions) 4. On Debian/Ubuntu: `sudo apt install python3-dev python3-venv` 5. On Fedora (if using LDAP source integration): `sudo yum install openldap-devel` diff --git a/metadata-ingestion/docs/sources/metabase/metabase.md b/metadata-ingestion/docs/sources/metabase/metabase.md index a76786f7e5853..68422b8decce9 100644 --- a/metadata-ingestion/docs/sources/metabase/metabase.md +++ b/metadata-ingestion/docs/sources/metabase/metabase.md @@ -19,4 +19,4 @@ The key in this map must be string, not integer although Metabase API provides If `database_id_to_instance_map` is not specified, `platform_instance_map` is used for platform instance mapping. If none of the above are specified, platform instance is not used when constructing `urn` when searching for dataset relations. ## Compatibility -Metabase version [v0.41.2](https://www.metabase.com/start/oss/) +Metabase version [v0.48.3](https://www.metabase.com/start/oss/) diff --git a/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml index c73904403f678..bd081172b2a27 100644 --- a/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml +++ b/metadata-ingestion/examples/data_contract/pet_of_the_week.dhub.dc.yaml @@ -1,21 +1,29 @@ -# id: pet_details_dc # Optional: This is the unique identifier for the data contract -display_name: Data Contract for SampleHiveDataset +version: 1 # datahub yaml format version + +# Note: this data contract yaml format is still in development, and will likely +# change in backwards-incompatible ways in the future. + entity: urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD) freshness: - time: 0700 - granularity: DAILY + type: cron + cron: 0 7 * * * # 7am daily + timezone: America/Los_Angeles schema: - properties: - field_foo: - type: string - native_type: VARCHAR(100) - field_bar: - type: boolean - required: - - field_bar + type: json-schema + json-schema: + properties: + field_foo: + type: string + native_type: VARCHAR(100) + field_bar: + type: boolean + required: + - field_bar data_quality: - - type: column_range - config: - column: field_foo - min: 0 - max: 100 + - type: unique + column: field_foo + - type: custom_sql + sql: SELECT COUNT(*) FROM SampleHiveDataset + operator: + type: greater_than + value: 100 diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 1fb570d76120e..c1a5da5826ead 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -340,7 +340,7 @@ "ldap": {"python-ldap>=2.4"}, "looker": looker_common, "lookml": looker_common, - "metabase": {"requests"} | sqllineage_lib, + "metabase": {"requests"} | sqlglot_lib, "mlflow": {"mlflow-skinny>=2.3.0"}, "mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib, "mongodb": {"pymongo[srv]>=3.11", "packaging"}, diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py index 421991a0966c3..95ca10045f1bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py @@ -34,7 +34,7 @@ class AwsAssumeRoleConfig(PermissiveConfigModel): def assume_role( role: AwsAssumeRoleConfig, - aws_region: str, + aws_region: Optional[str], credentials: Optional[dict] = None, ) -> dict: credentials = credentials or {} @@ -93,7 +93,7 @@ class AwsConnectionConfig(ConfigModel): default=None, description="Named AWS profile to use. Only used if access key / secret are unset. If not set the default will be used", ) - aws_region: str = Field(description="AWS region code.") + aws_region: Optional[str] = Field(None, description="AWS region code.") aws_endpoint_url: Optional[str] = Field( default=None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 826c18f69fd01..93601533bf8d6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -833,9 +833,8 @@ def get_profile_if_enabled( **{k: v for k, v in kwargs.items() if v} ) - partition_keys = response["Table"]["PartitionKeys"] - # check if this table is partitioned + partition_keys = response["Table"].get("PartitionKeys") if partition_keys: # ingest data profile with partitions # for cross-account ingestion diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py index 6f6e8bbc05661..e335174eeb003 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker.py @@ -82,7 +82,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: env=self.env, report=self.report, job_type_filter=self.source_config.extract_jobs, - aws_region=self.source_config.aws_region, + aws_region=self.sagemaker_client.meta.region_name, ) yield from job_processor.get_workunits() @@ -98,7 +98,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: model_image_to_jobs=model_image_to_jobs, model_name_to_jobs=model_name_to_jobs, lineage=lineage, - aws_region=self.source_config.aws_region, + aws_region=self.sagemaker_client.meta.region_name, ) yield from model_processor.get_workunits() diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 3704eae96aece..b8bc07b9a3559 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -47,6 +47,7 @@ BigqueryProject, BigQuerySchemaApi, BigqueryTable, + BigqueryTableSnapshot, BigqueryView, ) from datahub.ingestion.source.bigquery_v2.common import ( @@ -234,7 +235,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): run_id=self.ctx.run_id, ) - # For database, schema, tables, views, etc + # For database, schema, tables, views, snapshots etc self.lineage_extractor = BigqueryLineageExtractor( config, self.report, @@ -282,8 +283,12 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): # Maps project -> view_ref, so we can find all views in a project self.view_refs_by_project: Dict[str, Set[str]] = defaultdict(set) + # Maps project -> snapshot_ref, so we can find all snapshots in a project + self.snapshot_refs_by_project: Dict[str, Set[str]] = defaultdict(set) # Maps view ref -> actual sql self.view_definitions: FileBackedDict[str] = FileBackedDict() + # Maps snapshot ref -> Snapshot + self.snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot] = FileBackedDict() self.add_config_to_report() atexit.register(cleanup, config) @@ -303,6 +308,10 @@ def connectivity_test(client: bigquery.Client) -> CapabilityReport: else: return CapabilityReport(capable=True) + @property + def store_table_refs(self): + return self.config.include_table_lineage or self.config.include_usage_statistics + @staticmethod def metadata_read_capability_test( project_ids: List[str], config: BigQueryV2Config @@ -453,6 +462,7 @@ def _init_schema_resolver(self) -> SchemaResolver: self.config.include_schema_metadata and self.config.include_tables and self.config.include_views + and self.config.include_table_snapshots ) if schema_resolution_required and not schema_ingestion_enabled: @@ -567,6 +577,8 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.sql_parser_schema_resolver, self.view_refs_by_project, self.view_definitions, + self.snapshot_refs_by_project, + self.snapshots_by_ref, self.table_refs, ) @@ -603,6 +615,7 @@ def _process_project( ) -> Iterable[MetadataWorkUnit]: db_tables: Dict[str, List[BigqueryTable]] = {} db_views: Dict[str, List[BigqueryView]] = {} + db_snapshots: Dict[str, List[BigqueryTableSnapshot]] = {} project_id = bigquery_project.id try: @@ -651,9 +664,9 @@ def _process_project( self.report.report_dropped(f"{bigquery_dataset.name}.*") continue try: - # db_tables and db_views are populated in the this method + # db_tables, db_views, and db_snapshots are populated in the this method yield from self._process_schema( - project_id, bigquery_dataset, db_tables, db_views + project_id, bigquery_dataset, db_tables, db_views, db_snapshots ) except Exception as e: @@ -684,6 +697,7 @@ def _process_schema( bigquery_dataset: BigqueryDataset, db_tables: Dict[str, List[BigqueryTable]], db_views: Dict[str, List[BigqueryView]], + db_snapshots: Dict[str, List[BigqueryTableSnapshot]], ) -> Iterable[MetadataWorkUnit]: dataset_name = bigquery_dataset.name @@ -692,7 +706,11 @@ def _process_schema( ) columns = None - if self.config.include_tables or self.config.include_views: + if ( + self.config.include_tables + or self.config.include_views + or self.config.include_table_snapshots + ): columns = self.bigquery_data_dictionary.get_columns_for_dataset( project_id=project_id, dataset_name=dataset_name, @@ -713,7 +731,7 @@ def _process_schema( project_id=project_id, dataset_name=dataset_name, ) - elif self.config.include_table_lineage or self.config.include_usage_statistics: + elif self.store_table_refs: # Need table_refs to calculate lineage and usage for table_item in self.bigquery_data_dictionary.list_tables( dataset_name, project_id @@ -738,7 +756,10 @@ def _process_schema( if self.config.include_views: db_views[dataset_name] = list( self.bigquery_data_dictionary.get_views_for_dataset( - project_id, dataset_name, self.config.is_profiling_enabled() + project_id, + dataset_name, + self.config.is_profiling_enabled(), + self.report, ) ) @@ -751,6 +772,25 @@ def _process_schema( dataset_name=dataset_name, ) + if self.config.include_table_snapshots: + db_snapshots[dataset_name] = list( + self.bigquery_data_dictionary.get_snapshots_for_dataset( + project_id, + dataset_name, + self.config.is_profiling_enabled(), + self.report, + ) + ) + + for snapshot in db_snapshots[dataset_name]: + snapshot_columns = columns.get(snapshot.name, []) if columns else [] + yield from self._process_snapshot( + snapshot=snapshot, + columns=snapshot_columns, + project_id=project_id, + dataset_name=dataset_name, + ) + # This method is used to generate the ignore list for datatypes the profiler doesn't support we have to do it here # because the profiler doesn't have access to columns def generate_profile_ignore_list(self, columns: List[BigqueryColumn]) -> List[str]: @@ -778,7 +818,7 @@ def _process_table( self.report.report_dropped(table_identifier.raw_table_name()) return - if self.config.include_table_lineage or self.config.include_usage_statistics: + if self.store_table_refs: self.table_refs.add( str(BigQueryTableRef(table_identifier).get_sanitized_table_ref()) ) @@ -827,7 +867,7 @@ def _process_view( self.report.report_dropped(table_identifier.raw_table_name()) return - if self.config.include_table_lineage or self.config.include_usage_statistics: + if self.store_table_refs: table_ref = str( BigQueryTableRef(table_identifier).get_sanitized_table_ref() ) @@ -849,6 +889,48 @@ def _process_view( dataset_name=dataset_name, ) + def _process_snapshot( + self, + snapshot: BigqueryTableSnapshot, + columns: List[BigqueryColumn], + project_id: str, + dataset_name: str, + ) -> Iterable[MetadataWorkUnit]: + table_identifier = BigqueryTableIdentifier( + project_id, dataset_name, snapshot.name + ) + + self.report.snapshots_scanned += 1 + + if not self.config.table_snapshot_pattern.allowed( + table_identifier.raw_table_name() + ): + self.report.report_dropped(table_identifier.raw_table_name()) + return + + snapshot.columns = columns + snapshot.column_count = len(columns) + if not snapshot.column_count: + logger.warning( + f"Snapshot doesn't have any column or unable to get columns for table: {table_identifier}" + ) + + if self.store_table_refs: + table_ref = str( + BigQueryTableRef(table_identifier).get_sanitized_table_ref() + ) + self.table_refs.add(table_ref) + if snapshot.base_table_identifier: + self.snapshot_refs_by_project[project_id].add(table_ref) + self.snapshots_by_ref[table_ref] = snapshot + + yield from self.gen_snapshot_dataset_workunits( + table=snapshot, + columns=columns, + project_id=project_id, + dataset_name=dataset_name, + ) + def gen_table_dataset_workunits( self, table: BigqueryTable, @@ -933,9 +1015,34 @@ def gen_view_dataset_workunits( aspect=view_properties_aspect, ).as_workunit() + def gen_snapshot_dataset_workunits( + self, + table: BigqueryTableSnapshot, + columns: List[BigqueryColumn], + project_id: str, + dataset_name: str, + ) -> Iterable[MetadataWorkUnit]: + custom_properties: Dict[str, str] = {} + if table.ddl: + custom_properties["snapshot_ddl"] = table.ddl + if table.snapshot_time: + custom_properties["snapshot_time"] = str(table.snapshot_time) + if table.size_in_bytes: + custom_properties["size_in_bytes"] = str(table.size_in_bytes) + if table.rows_count: + custom_properties["rows_count"] = str(table.rows_count) + yield from self.gen_dataset_workunits( + table=table, + columns=columns, + project_id=project_id, + dataset_name=dataset_name, + sub_types=[DatasetSubTypes.BIGQUERY_TABLE_SNAPSHOT], + custom_properties=custom_properties, + ) + def gen_dataset_workunits( self, - table: Union[BigqueryTable, BigqueryView], + table: Union[BigqueryTable, BigqueryView, BigqueryTableSnapshot], columns: List[BigqueryColumn], project_id: str, dataset_name: str, @@ -1041,6 +1148,9 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: # TODO: Refractor this such that # converter = HiveColumnToAvroConverter(struct_type_separator=" "); # converter.get_schema_fields_for_hive_column(...) + original_struct_type_separator = ( + HiveColumnToAvroConverter._STRUCT_TYPE_SEPARATOR + ) HiveColumnToAvroConverter._STRUCT_TYPE_SEPARATOR = " " _COMPLEX_TYPE = re.compile("^(struct|array)") last_id = -1 @@ -1101,12 +1211,15 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: ) schema_fields.append(field) last_id = col.ordinal_position + HiveColumnToAvroConverter._STRUCT_TYPE_SEPARATOR = ( + original_struct_type_separator + ) return schema_fields def gen_schema_metadata( self, dataset_urn: str, - table: Union[BigqueryTable, BigqueryView], + table: Union[BigqueryTable, BigqueryView, BigqueryTableSnapshot], columns: List[BigqueryColumn], dataset_name: str, ) -> MetadataWorkUnit: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py index bb14295bc38a8..2f4978d49e687 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_config.py @@ -148,6 +148,15 @@ class BigQueryV2Config( " because the project id is represented as the top-level container.", ) + include_table_snapshots: Optional[bool] = Field( + default=True, description="Whether table snapshots should be ingested." + ) + + table_snapshot_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Regex patterns for table snapshots to filter in ingestion. Specify regex to match the entire snapshot name in database.schema.snapshot format. e.g. to match all snapshots starting with customer in Customer database and public schema, use the regex 'Customer.public.customer.*'", + ) + debug_include_full_payloads: bool = Field( default=False, description="Include full payload into events. It is only for debugging and internal use.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index 69913b383af87..ad7b86219e7c1 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -25,6 +25,7 @@ class BigQuerySchemaApiPerfReport(Report): get_tables_for_dataset: PerfTimer = field(default_factory=PerfTimer) list_tables: PerfTimer = field(default_factory=PerfTimer) get_views_for_dataset: PerfTimer = field(default_factory=PerfTimer) + get_snapshots_for_dataset: PerfTimer = field(default_factory=PerfTimer) @dataclass @@ -119,6 +120,8 @@ class BigQueryV2Report(ProfilingSqlReport, IngestionStageReport, BaseTimeWindowR num_usage_query_hash_collisions: int = 0 num_operational_stats_workunits_emitted: int = 0 + snapshots_scanned: int = 0 + num_view_definitions_parsed: int = 0 num_view_definitions_failed_parsing: int = 0 num_view_definitions_failed_column_parsing: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 7edc8656360bb..d918782691c77 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -106,6 +106,14 @@ class BigqueryView(BaseView): materialized: bool = False +@dataclass +class BigqueryTableSnapshot(BaseTable): + # Upstream table identifier + base_table_identifier: Optional[BigqueryTableIdentifier] = None + snapshot_time: Optional[datetime] = None + columns: List[BigqueryColumn] = field(default_factory=list) + + @dataclass class BigqueryDataset: name: str @@ -116,6 +124,7 @@ class BigqueryDataset: comment: Optional[str] = None tables: List[BigqueryTable] = field(default_factory=list) views: List[BigqueryView] = field(default_factory=list) + snapshots: List[BigqueryTableSnapshot] = field(default_factory=list) columns: List[BigqueryColumn] = field(default_factory=list) @@ -289,10 +298,11 @@ def get_views_for_dataset( project_id: str, dataset_name: str, has_data_read: bool, - report: Optional[BigQueryV2Report] = None, + report: BigQueryV2Report, ) -> Iterator[BigqueryView]: with self.report.get_views_for_dataset as current_timer: if has_data_read: + # If profiling is enabled cur = self.get_query_result( BigqueryQuery.views_for_dataset.format( project_id=project_id, dataset_name=dataset_name @@ -315,11 +325,10 @@ def get_views_for_dataset( f"Error while processing view {view_name}", exc_info=True, ) - if report: - report.report_warning( - "metadata-extraction", - f"Failed to get view {view_name}: {e}", - ) + report.report_warning( + "metadata-extraction", + f"Failed to get view {view_name}: {e}", + ) @staticmethod def _make_bigquery_view(view: bigquery.Row) -> BigqueryView: @@ -334,6 +343,8 @@ def _make_bigquery_view(view: bigquery.Row) -> BigqueryView: comment=view.comment, view_definition=view.view_definition, materialized=view.table_type == BigqueryTableType.MATERIALIZED_VIEW, + size_in_bytes=view.get("size_bytes"), + rows_count=view.get("row_count"), ) def get_columns_for_dataset( @@ -429,3 +440,62 @@ def get_columns_for_table( last_seen_table = column.table_name return columns + + def get_snapshots_for_dataset( + self, + project_id: str, + dataset_name: str, + has_data_read: bool, + report: BigQueryV2Report, + ) -> Iterator[BigqueryTableSnapshot]: + with self.report.get_snapshots_for_dataset as current_timer: + if has_data_read: + # If profiling is enabled + cur = self.get_query_result( + BigqueryQuery.snapshots_for_dataset.format( + project_id=project_id, dataset_name=dataset_name + ), + ) + else: + cur = self.get_query_result( + BigqueryQuery.snapshots_for_dataset_without_data_read.format( + project_id=project_id, dataset_name=dataset_name + ), + ) + + for table in cur: + try: + with current_timer.pause(): + yield BigQuerySchemaApi._make_bigquery_table_snapshot(table) + except Exception as e: + snapshot_name = f"{project_id}.{dataset_name}.{table.table_name}" + logger.warning( + f"Error while processing view {snapshot_name}", + exc_info=True, + ) + report.report_warning( + "metadata-extraction", + f"Failed to get view {snapshot_name}: {e}", + ) + + @staticmethod + def _make_bigquery_table_snapshot(snapshot: bigquery.Row) -> BigqueryTableSnapshot: + return BigqueryTableSnapshot( + name=snapshot.table_name, + created=snapshot.created, + last_altered=datetime.fromtimestamp( + snapshot.get("last_altered") / 1000, tz=timezone.utc + ) + if snapshot.get("last_altered") is not None + else snapshot.created, + comment=snapshot.comment, + ddl=snapshot.ddl, + snapshot_time=snapshot.snapshot_time, + size_in_bytes=snapshot.get("size_bytes"), + rows_count=snapshot.get("row_count"), + base_table_identifier=BigqueryTableIdentifier( + project_id=snapshot.base_table_catalog, + dataset=snapshot.base_table_schema, + table=snapshot.base_table_name, + ), + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index b44b06feb95af..7db36867b4e69 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -37,7 +37,10 @@ ) from datahub.ingestion.source.bigquery_v2.bigquery_config import BigQueryV2Config from datahub.ingestion.source.bigquery_v2.bigquery_report import BigQueryV2Report -from datahub.ingestion.source.bigquery_v2.bigquery_schema import BigQuerySchemaApi +from datahub.ingestion.source.bigquery_v2.bigquery_schema import ( + BigQuerySchemaApi, + BigqueryTableSnapshot, +) from datahub.ingestion.source.bigquery_v2.common import BQ_DATETIME_FORMAT from datahub.ingestion.source.bigquery_v2.queries import ( BQ_FILTER_RULE_TEMPLATE_V2_LINEAGE, @@ -198,6 +201,28 @@ def make_lineage_edges_from_parsing_result( return list(table_edges.values()) +def make_lineage_edge_for_snapshot( + snapshot: BigqueryTableSnapshot, +) -> Optional[LineageEdge]: + if snapshot.base_table_identifier: + base_table_name = str( + BigQueryTableRef.from_bigquery_table(snapshot.base_table_identifier) + ) + return LineageEdge( + table=base_table_name, + column_mapping=frozenset( + LineageEdgeColumnMapping( + out_column=column.field_path, + in_columns=frozenset([column.field_path]), + ) + for column in snapshot.columns + ), + auditStamp=datetime.now(timezone.utc), + type=DatasetLineageTypeClass.TRANSFORMED, + ) + return None + + class BigqueryLineageExtractor: def __init__( self, @@ -256,27 +281,35 @@ def get_lineage_workunits( sql_parser_schema_resolver: SchemaResolver, view_refs_by_project: Dict[str, Set[str]], view_definitions: FileBackedDict[str], + snapshot_refs_by_project: Dict[str, Set[str]], + snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot], table_refs: Set[str], ) -> Iterable[MetadataWorkUnit]: if not self._should_ingest_lineage(): return - views_skip_audit_log_lineage: Set[str] = set() - if self.config.lineage_parse_view_ddl: - view_lineage: Dict[str, Set[LineageEdge]] = {} - for project in projects: + datasets_skip_audit_log_lineage: Set[str] = set() + dataset_lineage: Dict[str, Set[LineageEdge]] = {} + for project in projects: + self.populate_snapshot_lineage( + dataset_lineage, + snapshot_refs_by_project[project], + snapshots_by_ref, + ) + + if self.config.lineage_parse_view_ddl: self.populate_view_lineage_with_sql_parsing( - view_lineage, + dataset_lineage, view_refs_by_project[project], view_definitions, sql_parser_schema_resolver, project, ) - views_skip_audit_log_lineage.update(view_lineage.keys()) - for lineage_key in view_lineage.keys(): - yield from self.gen_lineage_workunits_for_table( - view_lineage, BigQueryTableRef.from_string_name(lineage_key) - ) + datasets_skip_audit_log_lineage.update(dataset_lineage.keys()) + for lineage_key in dataset_lineage.keys(): + yield from self.gen_lineage_workunits_for_table( + dataset_lineage, BigQueryTableRef.from_string_name(lineage_key) + ) if self.config.use_exported_bigquery_audit_metadata: projects = ["*"] # project_id not used when using exported metadata @@ -286,7 +319,7 @@ def get_lineage_workunits( yield from self.generate_lineage( project, sql_parser_schema_resolver, - views_skip_audit_log_lineage, + datasets_skip_audit_log_lineage, table_refs, ) @@ -300,7 +333,7 @@ def generate_lineage( self, project_id: str, sql_parser_schema_resolver: SchemaResolver, - views_skip_audit_log_lineage: Set[str], + datasets_skip_audit_log_lineage: Set[str], table_refs: Set[str], ) -> Iterable[MetadataWorkUnit]: logger.info(f"Generate lineage for {project_id}") @@ -338,7 +371,7 @@ def generate_lineage( # as they may contain indirectly referenced tables. if ( lineage_key not in table_refs - or lineage_key in views_skip_audit_log_lineage + or lineage_key in datasets_skip_audit_log_lineage ): continue @@ -387,6 +420,17 @@ def populate_view_lineage_with_sql_parsing( ) ) + def populate_snapshot_lineage( + self, + snapshot_lineage: Dict[str, Set[LineageEdge]], + snapshot_refs: Set[str], + snapshots_by_ref: FileBackedDict[BigqueryTableSnapshot], + ) -> None: + for snapshot in snapshot_refs: + lineage_edge = make_lineage_edge_for_snapshot(snapshots_by_ref[snapshot]) + if lineage_edge: + snapshot_lineage[snapshot] = {lineage_edge} + def gen_lineage_workunits_for_table( self, lineage: Dict[str, Set[LineageEdge]], table_ref: BigQueryTableRef ) -> Iterable[MetadataWorkUnit]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py index 67fcc33cdf218..86971fce36a53 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries.py @@ -157,6 +157,62 @@ class BigqueryQuery: table_name ASC """ + snapshots_for_dataset: str = f""" +SELECT + t.table_catalog as table_catalog, + t.table_schema as table_schema, + t.table_name as table_name, + t.table_type as table_type, + t.creation_time as created, + t.is_insertable_into, + t.ddl, + t.snapshot_time_ms as snapshot_time, + t.base_table_catalog, + t.base_table_schema, + t.base_table_name, + ts.last_modified_time as last_altered, + tos.OPTION_VALUE as comment, + ts.row_count, + ts.size_bytes +FROM + `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t + join `{{project_id}}`.`{{dataset_name}}`.__TABLES__ as ts on ts.table_id = t.TABLE_NAME + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema + and t.TABLE_NAME = tos.TABLE_NAME + and tos.OPTION_NAME = "description" +WHERE + table_type = '{BigqueryTableType.SNAPSHOT}' +order by + table_schema ASC, + table_name ASC +""" + + snapshots_for_dataset_without_data_read: str = f""" +SELECT + t.table_catalog as table_catalog, + t.table_schema as table_schema, + t.table_name as table_name, + t.table_type as table_type, + t.creation_time as created, + t.is_insertable_into, + t.ddl, + t.snapshot_time_ms as snapshot_time, + t.base_table_catalog, + t.base_table_schema, + t.base_table_name, + tos.OPTION_VALUE as comment, +FROM + `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLES t + left join `{{project_id}}`.`{{dataset_name}}`.INFORMATION_SCHEMA.TABLE_OPTIONS as tos on t.table_schema = tos.table_schema + and t.TABLE_NAME = tos.TABLE_NAME + and tos.OPTION_NAME = "description" +WHERE + table_type = '{BigqueryTableType.SNAPSHOT}' +order by + table_schema ASC, + table_name ASC +""" + columns_for_dataset: str = """ select c.table_catalog as table_catalog, diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 741b4789bef21..3296a8fb29354 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -15,6 +15,7 @@ class DatasetSubTypes(str, Enum): SALESFORCE_CUSTOM_OBJECT = "Custom Object" SALESFORCE_STANDARD_OBJECT = "Object" POWERBI_DATASET_TABLE = "PowerBI Dataset Table" + BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot" # TODO: Create separate entity... NOTEBOOK = "Notebook" diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index 6fd3c5ba309f9..a2f96264b7f64 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -81,7 +81,7 @@ def aws_connection_needed_if_s3_uris_present( if (values.get(f) or "").startswith("s3://") ] - if uri_containing_fields and not aws_connection: + if uri_containing_fields and aws_connection is None: raise ValueError( f"Please provide aws_connection configuration, since s3 uris have been provided in fields {uri_containing_fields}" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 9f09a4322bb5d..d22bfb2b8b52f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime, timezone from functools import lru_cache from typing import Dict, Iterable, List, Optional, Tuple, Union @@ -7,7 +8,6 @@ import requests from pydantic import Field, validator from requests.models import HTTPError -from sqllineage.runner import LineageRunner import datahub.emitter.mce_builder as builder from datahub.configuration.source_common import DatasetLineageProviderConfigBase @@ -42,6 +42,9 @@ OwnershipTypeClass, ) from datahub.utilities import config_clean +from datahub.utilities.sqlglot_lineage import create_lineage_sql_parsed_result + +logger = logging.getLogger(__name__) DATASOURCE_URN_RECURSION_LIMIT = 5 @@ -87,10 +90,17 @@ class MetabaseSource(Source): """ This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested on PostgreSQL and H2 database. - ### Dashboard - [/api/dashboard](https://www.metabase.com/docs/latest/api-documentation.html#dashboard) endpoint is used to - retrieve the following dashboard information. + ### Collection + + [/api/collection](https://www.metabase.com/docs/latest/api/collection) endpoint is used to + retrieve the available collections. + + [/api/collection//items?models=dashboard](https://www.metabase.com/docs/latest/api/collection#get-apicollectioniditems) endpoint is used to retrieve a given collection and list their dashboards. + + ### Dashboard + + [/api/dashboard/](https://www.metabase.com/docs/latest/api/dashboard) endpoint is used to retrieve a given Dashboard and grab its information. - Title and description - Last edited by @@ -184,19 +194,29 @@ def close(self) -> None: def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: try: - dashboard_response = self.session.get( - f"{self.config.connect_uri}/api/dashboard" + collections_response = self.session.get( + f"{self.config.connect_uri}/api/collection/" ) - dashboard_response.raise_for_status() - dashboards = dashboard_response.json() + collections_response.raise_for_status() + collections = collections_response.json() - for dashboard_info in dashboards: - dashboard_snapshot = self.construct_dashboard_from_api_data( - dashboard_info + for collection in collections: + collection_dashboards_response = self.session.get( + f"{self.config.connect_uri}/api/collection/{collection['id']}/items?models=dashboard" ) - if dashboard_snapshot is not None: - mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot) - yield MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce) + collection_dashboards_response.raise_for_status() + collection_dashboards = collection_dashboards_response.json() + + if not collection_dashboards.get("data"): + continue + + for dashboard_info in collection_dashboards.get("data"): + dashboard_snapshot = self.construct_dashboard_from_api_data( + dashboard_info + ) + if dashboard_snapshot is not None: + mce = MetadataChangeEvent(proposedSnapshot=dashboard_snapshot) + yield MetadataWorkUnit(id=dashboard_snapshot.urn, mce=mce) except HTTPError as http_error: self.report.report_failure( @@ -225,7 +245,7 @@ def construct_dashboard_from_api_data( dashboard_response.raise_for_status() dashboard_details = dashboard_response.json() except HTTPError as http_error: - self.report.report_failure( + self.report.report_warning( key=f"metabase-dashboard-{dashboard_id}", reason=f"Unable to retrieve dashboard. " f"Reason: {str(http_error)}", ) @@ -251,10 +271,10 @@ def construct_dashboard_from_api_data( ) chart_urns = [] - cards_data = dashboard_details.get("ordered_cards", "{}") + cards_data = dashboard_details.get("dashcards", {}) for card_info in cards_data: chart_urn = builder.make_chart_urn( - self.platform, card_info.get("card_id", "") + self.platform, card_info.get("card").get("id", "") ) chart_urns.append(chart_urn) @@ -293,7 +313,7 @@ def _get_ownership(self, creator_id: int) -> Optional[OwnershipClass]: ) return None # For cases when the error is not 404 but something else - self.report.report_failure( + self.report.report_warning( key=f"metabase-user-{creator_id}", reason=f"Unable to retrieve User info. " f"Reason: {str(http_error)}", ) @@ -348,7 +368,7 @@ def get_card_details_by_id(self, card_id: Union[int, str]) -> dict: card_response.raise_for_status() return card_response.json() except HTTPError as http_error: - self.report.report_failure( + self.report.report_warning( key=f"metabase-card-{card_id}", reason=f"Unable to retrieve Card info. " f"Reason: {str(http_error)}", ) @@ -357,7 +377,7 @@ def get_card_details_by_id(self, card_id: Union[int, str]) -> dict: def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapshot]: card_id = card_data.get("id") if card_id is None: - self.report.report_failure( + self.report.report_warning( key="metabase-card", reason=f"Unable to get Card id from card data {str(card_data)}", ) @@ -365,7 +385,7 @@ def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapsho card_details = self.get_card_details_by_id(card_id) if not card_details: - self.report.report_failure( + self.report.report_warning( key=f"metabase-card-{card_id}", reason="Unable to construct Card due to empty card details", ) @@ -482,7 +502,7 @@ def get_datasource_urn( self, card_details: dict, recursion_depth: int = 0 ) -> Optional[List]: if recursion_depth > DATASOURCE_URN_RECURSION_LIMIT: - self.report.report_failure( + self.report.report_warning( key=f"metabase-card-{card_details.get('id')}", reason="Unable to retrieve Card info. Reason: source table recursion depth exceeded", ) @@ -496,14 +516,13 @@ def get_datasource_urn( platform_instance, ) = self.get_datasource_from_id(datasource_id) if not platform: - self.report.report_failure( + self.report.report_warning( key=f"metabase-datasource-{datasource_id}", reason=f"Unable to detect platform for database id {datasource_id}", ) return None query_type = card_details.get("dataset_query", {}).get("type", {}) - source_tables = set() if query_type == "query": source_table_id = ( @@ -525,57 +544,40 @@ def get_datasource_urn( # the question is built directly from table in DB schema_name, table_name = self.get_source_table_from_id(source_table_id) if table_name: - source_tables.add( - f"{database_name + '.' if database_name else ''}{schema_name + '.' if schema_name else ''}{table_name}" - ) - else: - try: - raw_query = ( - card_details.get("dataset_query", {}) - .get("native", {}) - .get("query", "") - ) - parser = LineageRunner(raw_query) - - for table in parser.source_tables: - sources = str(table).split(".") - - source_db = sources[-3] if len(sources) > 2 else database_name - source_schema, source_table = sources[-2], sources[-1] - if source_schema == "": - source_schema = ( - database_schema - if database_schema is not None - else str(self.config.default_schema) + name_components = [database_name, schema_name, table_name] + return [ + builder.make_dataset_urn_with_platform_instance( + platform=platform, + name=".".join([v for v in name_components if v]), + platform_instance=platform_instance, + env=self.config.env, ) - - source_tables.add( - f"{source_db + '.' if source_db else ''}{source_schema}.{source_table}" - ) - except Exception as e: - self.report.report_failure( - key="metabase-query", - reason=f"Unable to retrieve lineage from query. " - f"Query: {raw_query} " - f"Reason: {str(e)} ", - ) - return None - - if platform == "snowflake": - source_tables = set(i.lower() for i in source_tables) - - # Create dataset URNs - dataset_urn = [ - builder.make_dataset_urn_with_platform_instance( + ] + else: + raw_query = ( + card_details.get("dataset_query", {}).get("native", {}).get("query", "") + ) + result = create_lineage_sql_parsed_result( + query=raw_query, + default_db=database_name, + default_schema=database_schema or self.config.default_schema, platform=platform, - name=name, platform_instance=platform_instance, env=self.config.env, + graph=self.ctx.graph, ) - for name in source_tables - ] + if result.debug_info.table_error: + logger.info( + f"Failed to parse lineage from query {raw_query}: " + f"{result.debug_info.table_error}" + ) + self.report.report_warning( + key="metabase-query", + reason=f"Unable to retrieve lineage from query: {raw_query}", + ) + return result.in_tables - return dataset_urn + return None @lru_cache(maxsize=None) def get_source_table_from_id( @@ -592,10 +594,9 @@ def get_source_table_from_id( return schema, name except HTTPError as http_error: - self.report.report_failure( + self.report.report_warning( key=f"metabase-table-{table_id}", - reason=f"Unable to retrieve source table. " - f"Reason: {str(http_error)}", + reason=f"Unable to retrieve source table. Reason: {str(http_error)}", ) return None, None @@ -641,7 +642,7 @@ def get_datasource_from_id( dataset_response.raise_for_status() dataset_json = dataset_response.json() except HTTPError as http_error: - self.report.report_failure( + self.report.report_warning( key=f"metabase-datasource-{datasource_id}", reason=f"Unable to retrieve Datasource. " f"Reason: {str(http_error)}", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py index 0afa8e7ff4564..56c9a4abe18ad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py @@ -69,8 +69,8 @@ def parse_custom_sql( return sqlglot_l.create_lineage_sql_parsed_result( query=sql_query, - schema=schema, - database=database, + default_schema=schema, + default_db=database, platform=platform, platform_instance=platform_instance, env=env, diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py index 8135e1d44c102..3efef58737c6e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage.py @@ -167,8 +167,8 @@ def _get_sources_from_query( query=query, platform=LineageDatasetPlatform.REDSHIFT.value, platform_instance=self.config.platform_instance, - database=db_name, - schema=str(self.config.default_schema), + default_db=db_name, + default_schema=str(self.config.default_schema), graph=self.context.graph, env=self.config.env, ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 46694dfcc47d1..acdece14a6440 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -32,7 +32,6 @@ from urllib3 import Retry import datahub.emitter.mce_builder as builder -import datahub.utilities.sqlglot_lineage as sqlglot_l from datahub.configuration.common import ( AllowDenyPattern, ConfigModel, @@ -144,7 +143,11 @@ ViewPropertiesClass, ) from datahub.utilities import config_clean -from datahub.utilities.sqlglot_lineage import ColumnLineageInfo, SqlParsingResult +from datahub.utilities.sqlglot_lineage import ( + ColumnLineageInfo, + SqlParsingResult, + create_lineage_sql_parsed_result, +) from datahub.utilities.urns.dataset_urn import DatasetUrn logger: logging.Logger = logging.getLogger(__name__) @@ -1617,9 +1620,9 @@ def parse_custom_sql( f"Overridden info upstream_db={upstream_db}, platform_instance={platform_instance}, platform={platform}" ) - return sqlglot_l.create_lineage_sql_parsed_result( + return create_lineage_sql_parsed_result( query=query, - database=upstream_db, + default_db=upstream_db, platform=platform, platform_instance=platform_instance, env=env, diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index a2f460feca388..121b2e257a6ba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -533,6 +533,9 @@ def get_platform(connection_type: str) -> str: platform = "mssql" elif connection_type in ("athena"): platform = "athena" + elif connection_type.endswith("_jdbc"): + # e.g. convert trino_jdbc -> trino + platform = connection_type[: -len("_jdbc")] else: platform = connection_type return platform diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index 46ca17609f3ea..abe4f82673777 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -1280,35 +1280,35 @@ def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: def create_lineage_sql_parsed_result( query: str, - database: Optional[str], + default_db: Optional[str], platform: str, platform_instance: Optional[str], env: str, - schema: Optional[str] = None, + default_schema: Optional[str] = None, graph: Optional[DataHubGraph] = None, ) -> SqlParsingResult: - needs_close = False - try: - if graph: - schema_resolver = graph._make_schema_resolver( - platform=platform, - platform_instance=platform_instance, - env=env, - ) - else: - needs_close = True - schema_resolver = SchemaResolver( - platform=platform, - platform_instance=platform_instance, - env=env, - graph=None, - ) + if graph: + needs_close = False + schema_resolver = graph._make_schema_resolver( + platform=platform, + platform_instance=platform_instance, + env=env, + ) + else: + needs_close = True + schema_resolver = SchemaResolver( + platform=platform, + platform_instance=platform_instance, + env=env, + graph=None, + ) + try: return sqlglot_lineage( query, schema_resolver=schema_resolver, - default_db=database, - default_schema=schema, + default_db=default_db, + default_schema=default_schema, ) except Exception as e: return SqlParsingResult.make_from_error(e) diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json index 0ba6afbd04fc9..10c1c312a4d1c 100644 --- a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -25,6 +25,9 @@ }, "chartUrl": "http://localhost:3000/card/1", "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.customer,PROD)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.payment,PROD)" } @@ -34,7 +37,7 @@ }, { "com.linkedin.pegasus2avro.chart.ChartQuery": { - "rawQuery": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "rawQuery": "SELECT\n\tcustomer.customer_id,\n\tfirst_name,\n\tlast_name,\n\tamount,\n\tpayment_date,\n\trental_id\nFROM\n\tcustomer\nINNER JOIN payment \n ON payment.customer_id = customer.customer_id\nORDER BY payment_date", "type": "SQL" } }, @@ -57,7 +60,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -112,7 +116,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -141,6 +146,9 @@ }, "chartUrl": "http://localhost:3000/card/3", "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.customer,PROD)" + }, { "string": "urn:li:dataset:(urn:li:dataPlatform:bigquery,acryl-data.public.payment,PROD)" } @@ -167,7 +175,60 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(metabase,1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "Dashboard 1", + "description": "", + "charts": [ + "urn:li:chart:(metabase,1)", + "urn:li:chart:(metabase,2)", + "urn:li:chart:(metabase,3)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 1705398694904, + "actor": "urn:li:corpuser:admin@metabase.com" + }, + "lastModified": { + "time": 1705398694904, + "actor": "urn:li:corpuser:admin@metabase.com" + } + }, + "dashboardUrl": "http://localhost:3000/dashboard/10" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@metabase.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -182,20 +243,21 @@ "description": "", "charts": [ "urn:li:chart:(metabase,1)", - "urn:li:chart:(metabase,2)" + "urn:li:chart:(metabase,2)", + "urn:li:chart:(metabase,3)" ], "datasets": [], "lastModified": { "created": { - "time": 1639417721742, + "time": 1705398694904, "actor": "urn:li:corpuser:admin@metabase.com" }, "lastModified": { - "time": 1639417721742, + "time": 1705398694904, "actor": "urn:li:corpuser:admin@metabase.com" } }, - "dashboardUrl": "http://localhost:3000/dashboard/1" + "dashboardUrl": "http://localhost:3000/dashboard/10" } }, { @@ -217,7 +279,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -232,7 +295,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -247,7 +311,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -262,7 +327,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } }, { @@ -277,7 +343,8 @@ }, "systemMetadata": { "lastObserved": 1636614000000, - "runId": "metabase-test" + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/card.json b/metadata-ingestion/tests/integration/metabase/setup/card.json index 83bff66e6c9f3..7ded73d02ad7d 100644 --- a/metadata-ingestion/tests/integration/metabase/setup/card.json +++ b/metadata-ingestion/tests/integration/metabase/setup/card.json @@ -172,7 +172,7 @@ "dataset_query": { "type": "native", "native": { - "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "query": "SELECT\n\tcustomer.customer_id,\n\tfirst_name,\n\tlast_name,\n\tamount,\n\tpayment_date,\n\trental_id\nFROM\n\tcustomer\nINNER JOIN payment \n ON payment.customer_id = customer.customer_id\nORDER BY payment_date", "template-tags": {} }, "database": 2 diff --git a/metadata-ingestion/tests/integration/metabase/setup/card_1.json b/metadata-ingestion/tests/integration/metabase/setup/card_1.json index 01e35c5b30844..66c46a72997d0 100644 --- a/metadata-ingestion/tests/integration/metabase/setup/card_1.json +++ b/metadata-ingestion/tests/integration/metabase/setup/card_1.json @@ -177,7 +177,7 @@ "dataset_query": { "type": "native", "native": { - "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", + "query": "SELECT\n\tcustomer.customer_id,\n\tfirst_name,\n\tlast_name,\n\tamount,\n\tpayment_date,\n\trental_id\nFROM\n\tcustomer\nINNER JOIN payment \n ON payment.customer_id = customer.customer_id\nORDER BY payment_date", "template-tags": {} }, "database": 2 @@ -198,4 +198,4 @@ "collection": null, "created_at": "2021-12-13T17:46:32.77", "public_uuid": null -} \ No newline at end of file +} diff --git a/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json new file mode 100644 index 0000000000000..b602d2dfb7dcd --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json @@ -0,0 +1 @@ +{"total": 1, "data": [{"description": null, "collection_position": null, "database_id": null, "name": "This is a test", "id": 10, "entity_id": "Q4gEaOmoBkfQX3_gXiH9g", "last-edit-info": {"id": 14, "last_name": "Doe", "first_name": "John", "email": "john.doe@somewhere.com", "timestamp": "2024-01-12T14:55:38.43304Z"}, "model": "dashboard"}], "models": ["dashboard"], "limit": null, "offset": null} diff --git a/metadata-ingestion/tests/integration/metabase/setup/collections.json b/metadata-ingestion/tests/integration/metabase/setup/collections.json new file mode 100644 index 0000000000000..a8a98c4e6d62e --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/collections.json @@ -0,0 +1 @@ +[{"authority_level": null, "can_write": true, "name": "Our analytics", "effective_ancestors": [], "effective_location": null, "parent_id": null, "id": "root", "is_personal": false}, {"authority_level": null, "description": null, "archived": false, "slug": "john_doe_personal_collection", "can_write": true, "name": "John Doe", "personal_owner_id": 14, "type": null, "id": 150, "entity_id": "kdLA_-CQy4F5lL15k8-TU", "location": "/", "namespace": null, "is_personal": true, "created_at": "2024-01-12T11:51:24.394309Z"}] diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard.json deleted file mode 100644 index 095abf1bbdc6d..0000000000000 --- a/metadata-ingestion/tests/integration/metabase/setup/dashboard.json +++ /dev/null @@ -1,40 +0,0 @@ -[{ - "description": null, - "archived": false, - "collection_position": null, - "creator": { - "email": "admin@metabase.com", - "first_name": "FirstName", - "last_login": "2021-12-13T18:51:32.999", - "is_qbnewb": true, - "is_superuser": true, - "id": 1, - "last_name": "LastName", - "date_joined": "2021-12-13T07:34:21.806", - "common_name": "FirstName LastName" - }, - "enable_embedding": false, - "collection_id": null, - "show_in_getting_started": false, - "name": "Dashboard 1", - "caveats": null, - "creator_id": 1, - "updated_at": "2021-12-13T17:48:41.735", - "made_public_by_id": null, - "embedding_params": null, - "cache_ttl": null, - "id": 1, - "position": null, - "last-edit-info": { - "id": 1, - "email": "admin@metabase.com", - "first_name": "FirstName", - "last_name": "LastName", - "timestamp": "2021-12-13T17:48:41.742" - }, - "parameters": [], - "favorite": false, - "created_at": "2021-12-13T17:46:48.185", - "public_uuid": null, - "points_of_interest": null -}] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json index 0b232cd220045..e968093c43850 100644 --- a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json +++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json @@ -2,332 +2,854 @@ "description": null, "archived": false, "collection_position": null, - "ordered_cards": [{ - "sizeX": 4, - "series": [], - "collection_authority_level": null, - "card": { - "description": null, - "archived": false, - "collection_position": null, - "table_id": null, - "result_metadata": [{ - "name": "customer_id", - "display_name": "customer_id", - "base_type": "type/Integer", - "effective_type": "type/Integer", - "field_ref": ["field", "customer_id", { - "base-type": "type/Integer" - }], - "semantic_type": null, - "fingerprint": { - "global": { - "distinct-count": 517, - "nil%": 0.0 + "dashcards": [ + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "EVENT_DATE", + "field_ref": [ + "field", + "EVENT_DATE", + { + "base-type": "type/Date" + } + ], + "name": "EVENT_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2023-12-04T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } }, - "type": { - "type/Number": { - "min": 1.0, - "q1": 127.95550051624855, - "q3": 457.48181481488376, - "max": 599.0, - "sd": 183.35453319901166, - "avg": 293.316 + { + "display_name": "AND_VIEWERS", + "field_ref": [ + "field", + "AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 4720, + "q1": 5083.5, + "q3": 9003, + "max": 10560, + "sd": 2090.2420089751945, + "avg": 6688.214285714285 + } + } } - } - } - }, { - "name": "first_name", - "display_name": "first_name", - "base_type": "type/Text", - "effective_type": "type/Text", - "field_ref": ["field", "first_name", { - "base-type": "type/Text" - }], - "semantic_type": "type/Name", - "fingerprint": { - "global": { - "distinct-count": 509, - "nil%": 0.0 }, - "type": { - "type/Text": { - "percent-json": 0.0, - "percent-url": 0.0, - "percent-email": 0.0, - "percent-state": 0.0035, - "average-length": 5.629 + { + "display_name": "AND_REDACTED", + "field_ref": [ + "field", + "AND_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 948, + "q1": 2019.5, + "q3": 2500.5, + "max": 3180, + "sd": 460.56365857271413, + "avg": 2251.0714285714284 + } + } } - } - } - }, { - "name": "last_name", - "display_name": "last_name", - "base_type": "type/Text", - "effective_type": "type/Text", - "field_ref": ["field", "last_name", { - "base-type": "type/Text" - }], - "semantic_type": "type/Name", - "fingerprint": { - "global": { - "distinct-count": 517, - "nil%": 0.0 }, - "type": { - "type/Text": { - "percent-json": 0.0, - "percent-url": 0.0, - "percent-email": 0.0, - "percent-state": 0.0015, - "average-length": 6.126 + { + "display_name": "AND_REDACTED", + "field_ref": [ + "field", + "AND_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 3545, + "q1": 10909, + "q3": 13916, + "max": 18861, + "sd": 3132.780684756446, + "avg": 12122.32142857143 + } + } } - } - } - }, { - "name": "amount", - "display_name": "amount", - "base_type": "type/Decimal", - "effective_type": "type/Decimal", - "field_ref": ["field", "amount", { - "base-type": "type/Decimal" - }], - "semantic_type": null, - "fingerprint": { - "global": { - "distinct-count": 11, - "nil%": 0.0 }, - "type": { - "type/Number": { - "min": 0.99, - "q1": 2.399411317392306, - "q3": 5.52734176879965, - "max": 10.99, - "sd": 2.352151368009511, - "avg": 4.1405 + { + "display_name": "IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 6477, + "q1": 7481.5, + "q3": 10428.5, + "max": 13182, + "sd": 1948.047456520796, + "avg": 9075.17857142857 + } + } } - } - } - }, { - "name": "payment_date", - "display_name": "payment_date", - "base_type": "type/DateTime", - "effective_type": "type/DateTime", - "field_ref": ["field", "payment_date", { - "base-type": "type/DateTime" - }], - "semantic_type": null, - "fingerprint": { - "global": { - "distinct-count": 1998, - "nil%": 0.0 }, - "type": { - "type/DateTime": { - "earliest": "2007-02-14T21:21:59.996577Z", - "latest": "2007-02-21T19:27:46.996577Z" + { + "display_name": "IOS_REDACTED", + "field_ref": [ + "field", + "IOS_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 1470, + "q1": 3020, + "q3": 3806, + "max": 4670, + "sd": 665.7415088559197, + "avg": 3415.8571428571427 + } + } } - } - } - }, { - "name": "rental_id", - "display_name": "rental_id", - "base_type": "type/Integer", - "effective_type": "type/Integer", - "field_ref": ["field", "rental_id", { - "base-type": "type/Integer" - }], - "semantic_type": null, - "fingerprint": { - "global": { - "distinct-count": 2000, - "nil%": 0.0 }, - "type": { - "type/Number": { - "min": 1158.0, - "q1": 1731.7967120913397, - "q3": 2871.359273326854, - "max": 4591.0, - "sd": 660.7468728104022, - "avg": 2303.4565 + { + "display_name": "IOS_REDACTED", + "field_ref": [ + "field", + "IOS_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 4872, + "q1": 15019.5, + "q3": 20457, + "max": 27466, + "sd": 4688.492913816769, + "avg": 17683.89285714286 + } + } + } + }, + { + "display_name": "IOS_REDACTED/IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_REDACTED/IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED/IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.662587, + "q1": 1.8403745, + "q3": 2.241517, + "max": 2.576166, + "sd": 0.4488826998266724, + "avg": 1.974007857142857 + } + } + } + }, + { + "display_name": "AND_REDACTED/AND_VIEWERS", + "field_ref": [ + "field", + "AND_REDACTED/AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED/AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.671656, + "q1": 1.3536655, + "q3": 2.5325145, + "max": 3.097553, + "sd": 0.6816847359625038, + "avg": 1.93937275 + } + } + } + }, + { + "display_name": "IOS_REDACTED/IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_REDACTED/IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED/IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.199918, + "q1": 0.34496099999999996, + "q3": 0.4352085, + "max": 0.47286, + "sd": 0.06928869477079941, + "avg": 0.3833206785714286 + } + } + } + }, + { + "display_name": "AND_REDACTED/AND_VIEWERS", + "field_ref": [ + "field", + "AND_REDACTED/AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED/AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.179613, + "q1": 0.245343, + "q3": 0.475772, + "max": 0.522253, + "sd": 0.11732033433182058, + "avg": 0.3620892142857142 + } + } } } - } - }], - "database_id": 2, - "enable_embedding": false, - "collection_id": null, - "query_type": "native", - "name": "Customer Payment", - "query_average_duration": 820, - "creator_id": 1, - "moderation_reviews": [], - "updated_at": "2021-12-13T17:48:40.478", - "made_public_by_id": null, - "embedding_params": null, - "cache_ttl": null, - "dataset_query": { - "type": "native", - "native": { - "query": "SELECT\\n\\tcustomer.customer_id,\\n\\tfirst_name,\\n\\tlast_name,\\n\\tamount,\\n\\tpayment_date,\\n\\trental_id\\nFROM\\n\\tcustomer\\nINNER JOIN payment \\n ON payment.customer_id = customer.customer_id\\nORDER BY payment_date", - "template-tags": {} + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "REDACTED iOS vs. Android", + "query_average_duration": 50982, + "creator_id": 42, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:29.916717Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "-- 1. Table with redacted search users Android\n-- 2. Table with redacted search users iOS \n-- 3. Redacted from Android redacted\n-- 4. redacted from iOS\n-- 5. Compare the numbers iOS vs. Android\n\n\n-- 1. Table with redacted search users Android (to include date, platform, auth_account_id)\n-- 2. Table with redacted search users iOS (to include date, platform, auth_account_id)\n-- 3. Redacted from Android redacted (to include date, platform, count of redacted)\n-- 4. Redacted from iOS redacted (to include date, plaform, count of redacted)\n-- 5. Compare the numbers iOS vs. Android\n\nwith AND_viewers as \n(\nselect event_date, platform, auth_account_id \nfrom TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_SCREEN_VIEWS\nwhere screen_name='redacted_search'\nand event_date>'2023-12-01'\nand platform='Android'\nand dayofweekiso(event_date) NOT IN (6,7)\ngroup by event_date, platform, auth_account_id\norder by event_date desc\n), \niOS_viewers as \n(\nselect event_date, platform, auth_account_id \nfrom TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_SCREEN_VIEWS\nwhere screen_name='redacted_search'\nand event_date>'2023-12-01'\nand platform='iOS'\nand dayofweekiso(event_date) NOT IN (6,7)\ngroup by event_date, platform, auth_account_id\norder by event_date desc\n), \nAND_redacted as\n(\nselect redacted_ts::date as redacted_date, platform, count(distinct at.auth_account_id) as AND_redacted, count(group_redacted_id) as AND_redacted\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER at\njoin AND_viewers av on av.event_date=at.redacted_ts::date and av.auth_account_id=at.auth_account_id\nwhere instrument_type='REDACTED'\ngroup by 1,2\norder by 1 desc\n), \niOS_redacted as\n(\nselect redacted_ts::date as redacted_date, platform, count(distinct it.auth_account_id) as iOS_redacted, count(group_redacted_id) as iOS_redacted\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER it\njoin iOS_viewers iv on iv.event_date=it.redacted_ts::date and iv.auth_account_id=it.auth_account_id\nwhere instrument_type='REDACTED'\ngroup by 1,2\norder by 1 desc\n)\nselect a.event_date, count(distinct a.auth_account_id) as AND_viewers, AND_redacted, AND_redacted, count(distinct i.auth_account_id) as iOS_viewers, iOS_redacted, iOS_redacted, iOS_redacted/iOS_viewers, AND_redacted/AND_viewers, iOS_redacted/iOS_viewers, AND_redacted/AND_viewers\nfrom AND_VIEWERS a\njoin AND_redacted at\non a.event_date=at.redacted_date\njoin ios_viewers i\non a.event_date=i.event_date\njoin ios_redacted it\non i.event_date=it.redacted_date\ngroup by 1, 3, 4, 6, 7\norder by 1 desc\n\n\n", + "template-tags": {} + }, + "database": 3 }, - "database": 2 - }, - "id": 1, - "display": "table", - "visualization_settings": { - "table.pivot_column": "amount", - "table.cell_column": "customer_id" + "id": 1, + "parameter_mappings": [], + "display": "line", + "entity_id": "DhQgvvtTEarZH8yQBlqES", + "collection_preview": true, + "visualization_settings": { + "graph.dimensions": [ + "EVENT_DATE" + ], + "series_settings": { + "IOS_REDACTED/IOS_VIEWERS": { + "axis": "right" + }, + "AND_REDACTED/AND_VIEWERS": { + "axis": "right" + } + }, + "graph.metrics": [ + "IOS_REDACTED/IOS_VIEWERS", + "AND_REDACTED/AND_VIEWERS", + "AND_VIEWERS", + "IOS_VIEWERS" + ] + }, + "metabase_version": "v0.48.3 (80d8323)", + "parameters": [], + "dataset": false, + "created_at": "2024-01-16T09:44:49.407327Z", + "public_uuid": null }, - "created_at": "2021-12-13T17:46:32.77", - "public_uuid": null + "updated_at": "2024-01-16T09:45:45.410379Z", + "col": 0, + "id": 12, + "parameter_mappings": [], + "card_id": 1, + "entity_id": "tA9M9vJlTHG0KxQnvknKW", + "visualization_settings": {}, + "size_y": 6, + "dashboard_id": 1, + "created_at": "2024-01-16T09:45:45.410379Z", + "row": 0 }, - "updated_at": "2021-12-13T17:48:41.68", - "col": 0, - "id": 1, - "parameter_mappings": [], - "card_id": 1, - "visualization_settings": {}, - "dashboard_id": 1, - "created_at": "2021-12-13T17:46:52.278", - "sizeY": 4, - "row": 0 - }, { - "sizeX": 4, - "series": [], - "collection_authority_level": null, - "card": { - "description": null, - "archived": false, - "collection_position": null, - "table_id": 21, - "result_metadata": [{ - "semantic_type": "type/Category", - "coercion_strategy": null, - "name": "rating", - "field_ref": ["field", 131, null], - "effective_type": "type/*", - "id": 131, - "display_name": "Rating", - "fingerprint": { - "global": { - "distinct-count": 5, - "nil%": 0.0 + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "CALENDAR_DATE", + "field_ref": [ + "field", + "CALENDAR_DATE", + { + "base-type": "type/Date" + } + ], + "name": "CALENDAR_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 30, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2023-12-17T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } + }, + { + "display_name": "REDACTED", + "field_ref": [ + "field", + "REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 682175, + "q1": 738644, + "q3": 805974, + "max": 847312, + "sd": 46783.99996291344, + "avg": 775505.5666666667 + } + } + } }, - "type": { - "type/Text": { - "percent-json": 0.0, - "percent-url": 0.0, - "percent-email": 0.0, - "percent-state": 0.0, - "average-length": 2.926 + { + "display_name": "REDACTEDRS", + "field_ref": [ + "field", + "REDACTEDRS", + { + "base-type": "type/Number" + } + ], + "name": "REDACTEDRS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 46173, + "q1": 47556.94427191, + "q3": 48890, + "max": 50769, + "sd": 1164.9989906758983, + "avg": 48354.8 + } + } + } + }, + { + "display_name": "REDACTED/REDACTEDRS", + "field_ref": [ + "field", + "REDACTED/REDACTEDRS", + { + "base-type": "type/Number" + } + ], + "name": "REDACTED/REDACTEDRS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 14.706168, + "q1": 15.398378, + "q3": 16.920933, + "max": 17.289964, + "sd": 0.8020030995826715, + "avg": 16.033017833333336 + } + } } } + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "Redacted redacted per redacted user", + "query_average_duration": 20433, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:29.916788Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "with dd as (\nselect distinct calendar_date as calendar_date from TEAMS_PRD.DATA_PLATFORM_MART.MRT__CALENDAR_DATES\nwhere calendar_date>'2022-01-01'\n), \nredacted as\n(\nselect dd.calendar_date, count(distinct auth_account_id) as redacted, max(redacted_ts), min(redacted_ts)\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER t\njoin dd on redacted_ts::date BETWEEN dd.calendar_date-29 and dd.calendar_date\nwhere redacted_type='REGULAR'\nand instrument_type = 'REDACTED'\ngroup by dd.calendar_date\norder by dd.calendar_date desc\n),\nredacted as\n(\nselect dd.calendar_date, count(group_redacted_id) as redacted, max(redacted_ts), min(redacted_ts)\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER t\njoin dd on redacted_ts::date BETWEEN dd.calendar_date-29 and dd.calendar_date\nwhere redacted_type='REGULAR'\nand instrument_type = 'REDACTED'\ngroup by dd.calendar_date\norder by dd.calendar_date desc\n)\nselect dd.calendar_date, redacted, redacted, redacted/redacted\nfrom dd\njoin redacted t on dd.calendar_date=t.calendar_date\njoin redacted tr on dd.calendar_date=tr.calendar_date\ngroup by dd.calendar_date, redacted, redacted, redacted/redacted\norder by dd.calendar_date desc \nlimit 30", + "template-tags": {} + }, + "database": 3 + }, + "id": 2, + "parameter_mappings": [], + "display": "line", + "entity_id": "b1jUcPcQM0XFMuviv4g3K", + "collection_preview": true, + "visualization_settings": { + "graph.dimensions": [ + "CALENDAR_DATE" + ], + "series_settings": { + "REDACTEDRS": { + "axis": "right" + } + }, + "graph.metrics": [ + "REDACTED/REDACTEDRS", + "REDACTEDRS" + ] }, - "base_type": "type/PostgresEnum" - }, { - "name": "count", - "display_name": "Count", - "base_type": "type/BigInteger", - "effective_type": "type/BigInteger", - "semantic_type": "type/Quantity", - "field_ref": ["aggregation", 0], - "fingerprint": { - "global": { - "distinct-count": 5, - "nil%": 0.0 + "metabase_version": "v0.48.3 (80d8323)", + "parameters": [], + "dataset": false, + "created_at": "2024-01-16T09:50:09.487369Z", + "public_uuid": null + }, + "updated_at": "2024-01-16T09:50:34.394488Z", + "col": 12, + "id": 1, + "parameter_mappings": [], + "card_id": 2, + "entity_id": "lXypX5aa14HjkN_Im82C2", + "visualization_settings": {}, + "size_y": 6, + "dashboard_id": 1, + "created_at": "2024-01-16T09:50:34.394488Z", + "row": 0 + }, + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "EVENT_DATE", + "field_ref": [ + "field", + "EVENT_DATE", + { + "base-type": "type/Date" + } + ], + "name": "EVENT_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2024-01-01T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } + }, + { + "display_name": "KNOCKOUT", + "field_ref": [ + "field", + "KNOCKOUT", + { + "base-type": "type/Number" + } + ], + "name": "KNOCKOUT", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 175, + "q1": 853.75, + "q3": 1116.75, + "max": 1174, + "sd": 296.0767713709648, + "avg": 916.3636363636364 + } + } + } + }, + { + "display_name": "EXPIRY", + "field_ref": [ + "field", + "EXPIRY", + { + "base-type": "type/Number" + } + ], + "name": "EXPIRY", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 10, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 78, + "q1": 295.5, + "q3": 408.3925271309261, + "max": 431, + "sd": 105.10704500218294, + "avg": 336.90909090909093 + } + } + } }, - "type": { - "type/Number": { - "min": 178.0, - "q1": 190.0, - "q3": 213.25, - "max": 223.0, - "sd": 17.131841699011815, - "avg": 200.0 + { + "display_name": "PRODUCT", + "field_ref": [ + "field", + "PRODUCT", + { + "base-type": "type/Number" + } + ], + "name": "PRODUCT", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 9, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 57, + "q1": 163.75, + "q3": 233, + "max": 255, + "sd": 59.31119777763877, + "avg": 195.27272727272728 + } + } + } + }, + { + "display_name": "ISSUER", + "field_ref": [ + "field", + "ISSUER", + { + "base-type": "type/Number" + } + ], + "name": "ISSUER", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 10, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 43, + "q1": 214, + "q3": 292.25, + "max": 304, + "sd": 79.35879397910594, + "avg": 245.72727272727272 + } + } } } - } - }], - "database_id": 2, - "enable_embedding": false, - "collection_id": null, - "query_type": "query", - "name": "Films, Count, Grouped by Rating, Filtered by Release Year, Sorted by [Unknown Field] descending", - "query_average_duration": 25, - "creator_id": 1, - "moderation_reviews": [], - "updated_at": "2021-12-13T17:48:39.999", - "made_public_by_id": null, - "embedding_params": null, - "cache_ttl": null, - "dataset_query": { - "query": { - "source-table": 21, - "breakout": [ - ["field", 131, null] - ], - "aggregation": [ - ["count"] - ], - "order-by": [ - ["desc", ["aggregation", 0]] + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "Filter popularity", + "query_average_duration": 2830, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:30.128815Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "with issuer as\n(\n select event_date, count(*) as issuer_clicks, count(distinct auth_account_id) as issuer\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='issuer'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), expiry as\n(\n select event_date, count(*) as expiry_clicks, count(distinct auth_account_id) as expiry\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='expiry'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), product as\n(\n select event_date, count(*) as product_clicks, count(distinct auth_account_id) as product\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='product'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), knockout as \n(\n select event_date, count(*) as knockout_clicks, count(distinct auth_account_id) as knockout\n from TEAMS_PRD.SCHEMA.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='knockout'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n)\nselect k.event_date, knockout, expiry, product, issuer\nfrom knockout k\njoin expiry e on k.event_date=e.event_date\njoin issuer i on k.event_date=i.event_date\njoin product p on k.event_date=p.event_date\nwhere k.event_date BigqueryTableSnapshot: + now = datetime.now(tz=timezone.utc) + return BigqueryTableSnapshot( + name="table-snapshot", + created=now - timedelta(days=10), + last_altered=now - timedelta(hours=1), + comment="comment1", + ddl="CREATE SNAPSHOT TABLE 1", + size_in_bytes=None, + rows_count=None, + snapshot_time=now - timedelta(days=10), + base_table_identifier=BigqueryTableIdentifier( + project_id="test-project", + dataset="test-dataset", + table="test-table", + ), + ) + + +@patch.object(BigQuerySchemaApi, "get_query_result") +@patch.object(BigQueryV2Config, "get_bigquery_client") +def test_get_snapshots_for_dataset( + get_bq_client_mock: Mock, + query_mock: Mock, + bigquery_snapshot: BigqueryTableSnapshot, +) -> None: + client_mock = MagicMock() + get_bq_client_mock.return_value = client_mock + assert bigquery_snapshot.last_altered + assert bigquery_snapshot.base_table_identifier + row1 = create_row( + dict( + table_name=bigquery_snapshot.name, + created=bigquery_snapshot.created, + last_altered=bigquery_snapshot.last_altered.timestamp() * 1000, + comment=bigquery_snapshot.comment, + ddl=bigquery_snapshot.ddl, + snapshot_time=bigquery_snapshot.snapshot_time, + table_type="SNAPSHOT", + base_table_catalog=bigquery_snapshot.base_table_identifier.project_id, + base_table_schema=bigquery_snapshot.base_table_identifier.dataset, + base_table_name=bigquery_snapshot.base_table_identifier.table, + ) + ) + query_mock.return_value = [row1] + bigquery_data_dictionary = BigQuerySchemaApi( + BigQueryV2Report().schema_api_perf, client_mock + ) + + snapshots = bigquery_data_dictionary.get_snapshots_for_dataset( + project_id="test-project", + dataset_name="test-dataset", + has_data_read=False, + report=BigQueryV2Report(), + ) + assert list(snapshots) == [bigquery_snapshot] + + +@patch.object(BigQueryV2Config, "get_bigquery_client") +def test_gen_snapshot_dataset_workunits(get_bq_client_mock, bigquery_snapshot): + project_id = "test-project" + dataset_name = "test-dataset" + config = BigQueryV2Config.parse_obj( + { + "project_id": project_id, + } + ) + source: BigqueryV2Source = BigqueryV2Source( + config=config, ctx=PipelineContext(run_id="test") + ) + + gen = source.gen_snapshot_dataset_workunits( + bigquery_snapshot, [], project_id, dataset_name + ) + mcp = cast(MetadataChangeProposalWrapper, list(gen)[2].metadata) + dataset_properties = cast(DatasetPropertiesClass, mcp.aspect) + assert dataset_properties.customProperties["snapshot_ddl"] == bigquery_snapshot.ddl + assert dataset_properties.customProperties["snapshot_time"] == str( + bigquery_snapshot.snapshot_time + ) + + @pytest.mark.parametrize( "table_name, expected_table_prefix, expected_shard", [ diff --git a/metadata-ingestion/tests/unit/test_dbt_source.py b/metadata-ingestion/tests/unit/test_dbt_source.py index 0fbe9ecbcc43c..737cf6aca33cc 100644 --- a/metadata-ingestion/tests/unit/test_dbt_source.py +++ b/metadata-ingestion/tests/unit/test_dbt_source.py @@ -1,6 +1,7 @@ from typing import Dict, List, Union from unittest import mock +import pytest from pydantic import ValidationError from datahub.emitter import mce_builder @@ -180,14 +181,12 @@ def test_dbt_entity_emission_configuration(): "target_platform": "dummy_platform", "entities_enabled": {"models": "Only", "seeds": "Only"}, } - try: + with pytest.raises( + ValidationError, + match="Cannot have more than 1 type of entity emission set to ONLY", + ): DBTCoreConfig.parse_obj(config_dict) - except ValidationError as ve: - assert len(ve.errors()) == 1 - assert ( - "Cannot have more than 1 type of entity emission set to ONLY" - in ve.errors()[0]["msg"] - ) + # valid config config_dict = { "manifest_path": "dummy_path", @@ -198,6 +197,26 @@ def test_dbt_entity_emission_configuration(): DBTCoreConfig.parse_obj(config_dict) +def test_dbt_s3_config(): + # test missing aws config + config_dict: dict = { + "manifest_path": "s3://dummy_path", + "catalog_path": "s3://dummy_path", + "target_platform": "dummy_platform", + } + with pytest.raises(ValidationError, match="provide aws_connection"): + DBTCoreConfig.parse_obj(config_dict) + + # valid config + config_dict = { + "manifest_path": "s3://dummy_path", + "catalog_path": "s3://dummy_path", + "target_platform": "dummy_platform", + "aws_connection": {}, + } + DBTCoreConfig.parse_obj(config_dict) + + def test_default_convert_column_urns_to_lowercase(): config_dict = { "manifest_path": "dummy_path", diff --git a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java index fa896814d16f6..a4eb035b0abce 100644 --- a/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java +++ b/metadata-integration/java/spark-lineage/src/test/java/datahub/spark/TestSparkJobsLineage.java @@ -136,6 +136,7 @@ public static void resetBaseExpectations() { .respond(HttpResponse.response().withStatusCode(200)); } + @BeforeClass public static void init() { mockServer = startClientAndServer(GMS_PORT); resetBaseExpectations(); @@ -219,8 +220,12 @@ private static void clear() { @AfterClass public static void tearDown() throws Exception { - spark.stop(); - mockServer.stop(); + if (spark != null) { + spark.stop(); + } + if (mockServer != null) { + mockServer.stop(); + } } private static void check(List expected, List actual) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index 3c71a2dfd9180..0a9a9fbbad086 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler; @@ -33,6 +34,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -554,7 +556,8 @@ private QueryBuilder buildQueryStringV2( queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal)); - queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter)); + queryBuilder.filter( + SearchRequestHandler.getFilterQuery(filter, entitySpec.getSearchableFieldTypes())); return queryBuilder; } @@ -580,7 +583,18 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities( queryBuilder.filter(QueryBuilders.rangeQuery(BROWSE_PATH_V2_DEPTH).gt(browseDepthVal)); - queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter)); + Map> searchableFields = + entitySpecs.stream() + .flatMap(entitySpec -> entitySpec.getSearchableFieldTypes().entrySet().stream()) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); + queryBuilder.filter(SearchRequestHandler.getFilterQuery(filter, searchableFields)); return queryBuilder; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 0eb44edfb11de..7de2770626ae3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -78,7 +78,7 @@ public long docCount(@Nonnull String entityName) { EntitySpec entitySpec = entityRegistry.getEntitySpec(entityName); CountRequest countRequest = new CountRequest(indexConvention.getIndexName(entitySpec)) - .query(SearchRequestHandler.getFilterQuery(null)); + .query(SearchRequestHandler.getFilterQuery(null, entitySpec.getSearchableFieldTypes())); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "docCount").time()) { return client.count(countRequest, RequestOptions.DEFAULT).getCount(); } catch (IOException e) { @@ -315,9 +315,17 @@ public Map aggregateByValue( @Nonnull String field, @Nullable Filter requestParams, int limit) { + List entitySpecs; + if (entityNames == null || entityNames.isEmpty()) { + entitySpecs = new ArrayList<>(entityRegistry.getEntitySpecs().values()); + } else { + entitySpecs = + entityNames.stream().map(entityRegistry::getEntitySpec).collect(Collectors.toList()); + } final SearchRequest searchRequest = - SearchRequestHandler.getAggregationRequest( - field, transformFilterForEntities(requestParams, indexConvention), limit); + SearchRequestHandler.getBuilder(entitySpecs, searchConfiguration, customSearchConfiguration) + .getAggregationRequest( + field, transformFilterForEntities(requestParams, indexConvention), limit); if (entityNames == null) { String indexName = indexConvention.getAllEntityIndicesPattern(); searchRequest.indices(indexName); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index cdcdae2f3d311..3835032247874 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.utils.ESUtils; import java.net.URISyntaxException; +import java.util.Collections; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -40,19 +41,35 @@ public class AutocompleteRequestHandler { private final List _defaultAutocompleteFields; + private final Map> searchableFieldTypes; private static final Map AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME = new ConcurrentHashMap<>(); public AutocompleteRequestHandler(@Nonnull EntitySpec entitySpec) { + List fieldSpecs = entitySpec.getSearchableFieldSpecs(); _defaultAutocompleteFields = Stream.concat( - entitySpec.getSearchableFieldSpecs().stream() + fieldSpecs.stream() .map(SearchableFieldSpec::getSearchableAnnotation) .filter(SearchableAnnotation::isEnableAutocomplete) .map(SearchableAnnotation::getFieldName), Stream.of("urn")) .collect(Collectors.toList()); + searchableFieldTypes = + fieldSpecs.stream() + .collect( + Collectors.toMap( + searchableFieldSpec -> + searchableFieldSpec.getSearchableAnnotation().getFieldName(), + searchableFieldSpec -> + new HashSet<>( + Collections.singleton( + searchableFieldSpec.getSearchableAnnotation().getFieldType())), + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); } public static AutocompleteRequestHandler getBuilder(@Nonnull EntitySpec entitySpec) { @@ -66,7 +83,7 @@ public SearchRequest getSearchRequest( SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(limit); searchSourceBuilder.query(getQuery(input, field)); - searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter, false)); + searchSourceBuilder.postFilter(ESUtils.buildFilterQuery(filter, false, searchableFieldTypes)); searchSourceBuilder.highlighter(getHighlights(field)); searchRequest.source(searchSourceBuilder); return searchRequest; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index c5a5ade216bf7..277e15e1334d5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -97,6 +97,7 @@ public class SearchRequestHandler { private final SearchConfiguration _configs; private final SearchQueryBuilder _searchQueryBuilder; private final AggregationQueryBuilder _aggregationQueryBuilder; + private final Map> searchableFieldTypes; private SearchRequestHandler( @Nonnull EntitySpec entitySpec, @@ -121,6 +122,17 @@ private SearchRequestHandler( _searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration); _aggregationQueryBuilder = new AggregationQueryBuilder(configs, annotations); _configs = configs; + searchableFieldTypes = + _entitySpecs.stream() + .flatMap(entitySpec -> entitySpec.getSearchableFieldTypes().entrySet().stream()) + .collect( + Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (set1, set2) -> { + set1.addAll(set2); + return set1; + })); } public static SearchRequestHandler getBuilder( @@ -169,8 +181,14 @@ private BinaryOperator mapMerger() { }; } - public static BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { - BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter, false); + public BoolQueryBuilder getFilterQuery(@Nullable Filter filter) { + return getFilterQuery(filter, searchableFieldTypes); + } + + public static BoolQueryBuilder getFilterQuery( + @Nullable Filter filter, + Map> searchableFieldTypes) { + BoolQueryBuilder filterQuery = ESUtils.buildFilterQuery(filter, false, searchableFieldTypes); return filterSoftDeletedByDefault(filter, filterQuery); } @@ -354,7 +372,7 @@ public SearchRequest getFilterRequest( * @return {@link SearchRequest} that contains the aggregation query */ @Nonnull - public static SearchRequest getAggregationRequest( + public SearchRequest getAggregationRequest( @Nonnull String field, @Nullable Filter filter, int limit) { SearchRequest searchRequest = new SearchRequest(); BoolQueryBuilder filterQuery = getFilterQuery(filter); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index aa854149de43a..4d74bfb66b8db 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -7,7 +7,6 @@ import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.StructuredPropertyUtils; @@ -18,11 +17,13 @@ import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; @@ -32,6 +33,7 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.search.builder.PointInTimeBuilder; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.sort.FieldSortBuilder; @@ -76,6 +78,13 @@ public class ESUtils { SearchableAnnotation.FieldType.BROWSE_PATH_V2, SearchableAnnotation.FieldType.URN, SearchableAnnotation.FieldType.URN_PARTIAL); + + public static final Set RANGE_QUERY_CONDITIONS = + Set.of( + Condition.GREATER_THAN, + Condition.GREATER_THAN_OR_EQUAL_TO, + Condition.LESS_THAN, + Condition.LESS_THAN_OR_EQUAL_TO); public static final String ENTITY_NAME_FIELD = "_entityName"; public static final String NAME_SUGGESTION = "nameSuggestion"; @@ -100,9 +109,6 @@ public class ESUtils { } }; - // TODO - This has been expanded for has* in another branch - public static final Set BOOLEAN_FIELDS = ImmutableSet.of("removed"); - /* * Refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/regexp-syntax.html for list of reserved * characters in an Elasticsearch regular expression. @@ -123,7 +129,10 @@ private ESUtils() {} * @return built filter query */ @Nonnull - public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean isTimeseries) { + public static BoolQueryBuilder buildFilterQuery( + @Nullable Filter filter, + boolean isTimeseries, + final Map> searchableFieldTypes) { BoolQueryBuilder finalQueryBuilder = QueryBuilders.boolQuery(); if (filter == null) { return finalQueryBuilder; @@ -134,7 +143,8 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean .getOr() .forEach( or -> - finalQueryBuilder.should(ESUtils.buildConjunctiveFilterQuery(or, isTimeseries))); + finalQueryBuilder.should( + ESUtils.buildConjunctiveFilterQuery(or, isTimeseries, searchableFieldTypes))); } else if (filter.getCriteria() != null) { // Otherwise, build boolean query from the deprecated "criteria" field. log.warn("Received query Filter with a deprecated field 'criteria'. Use 'or' instead."); @@ -146,7 +156,8 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean if (!criterion.getValue().trim().isEmpty() || criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { - andQueryBuilder.must(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.must( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); } }); finalQueryBuilder.should(andQueryBuilder); @@ -156,7 +167,9 @@ public static BoolQueryBuilder buildFilterQuery(@Nullable Filter filter, boolean @Nonnull public static BoolQueryBuilder buildConjunctiveFilterQuery( - @Nonnull ConjunctiveCriterion conjunctiveCriterion, boolean isTimeseries) { + @Nonnull ConjunctiveCriterion conjunctiveCriterion, + boolean isTimeseries, + Map> searchableFieldTypes) { final BoolQueryBuilder andQueryBuilder = new BoolQueryBuilder(); conjunctiveCriterion .getAnd() @@ -167,9 +180,11 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( || criterion.hasValues()) { if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) - andQueryBuilder.filter(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.filter( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); } else { - andQueryBuilder.mustNot(getQueryBuilderFromCriterion(criterion, isTimeseries)); + andQueryBuilder.mustNot( + getQueryBuilderFromCriterion(criterion, isTimeseries, searchableFieldTypes)); } } }); @@ -205,7 +220,9 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( */ @Nonnull public static QueryBuilder getQueryBuilderFromCriterion( - @Nonnull final Criterion criterion, boolean isTimeseries) { + @Nonnull final Criterion criterion, + boolean isTimeseries, + final Map> searchableFieldTypes) { final String fieldName = toFacetField(criterion.getField()); if (fieldName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD)) { criterion.setField(fieldName); @@ -224,10 +241,11 @@ public static QueryBuilder getQueryBuilderFromCriterion( if (maybeFieldToExpand.isPresent()) { return getQueryBuilderFromCriterionForFieldToExpand( - maybeFieldToExpand.get(), criterion, isTimeseries); + maybeFieldToExpand.get(), criterion, isTimeseries, searchableFieldTypes); } - return getQueryBuilderFromCriterionForSingleField(criterion, isTimeseries); + return getQueryBuilderFromCriterionForSingleField( + criterion, isTimeseries, searchableFieldTypes); } public static String getElasticTypeForFieldType(SearchableAnnotation.FieldType fieldType) { @@ -378,7 +396,7 @@ public static String toFacetField(@Nonnull final String filterField) { @Nonnull public static String toKeywordField( - @Nonnull final String filterField, @Nonnull final boolean skipKeywordSuffix) { + @Nonnull final String filterField, final boolean skipKeywordSuffix) { return skipKeywordSuffix || KEYWORD_FIELDS.contains(filterField) || PATH_HIERARCHY_FIELDS.contains(filterField) @@ -428,7 +446,8 @@ public static void setSearchAfter( private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull final List fields, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + final Map> searchableFieldTypes) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { Criterion criterionToQuery = new Criterion(); @@ -442,14 +461,17 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( } criterionToQuery.setField(toKeywordField(field, isTimeseries)); orQueryBuilder.should( - getQueryBuilderFromCriterionForSingleField(criterionToQuery, isTimeseries)); + getQueryBuilderFromCriterionForSingleField( + criterionToQuery, isTimeseries, searchableFieldTypes)); } return orQueryBuilder; } @Nonnull private static QueryBuilder getQueryBuilderFromCriterionForSingleField( - @Nonnull Criterion criterion, @Nonnull boolean isTimeseries) { + @Nonnull Criterion criterion, + boolean isTimeseries, + final Map> searchableFieldTypes) { final Condition condition = criterion.getCondition(); final String fieldName = toFacetField(criterion.getField()); @@ -463,24 +485,11 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( .queryName(fieldName); } else if (criterion.hasValues() || criterion.hasValue()) { if (condition == Condition.EQUAL) { - return buildEqualsConditionFromCriterion(fieldName, criterion, isTimeseries); - // TODO: Support multi-match on the following operators (using new 'values' field) - } else if (condition == Condition.GREATER_THAN) { - return QueryBuilders.rangeQuery(criterion.getField()) - .gt(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) { - return QueryBuilders.rangeQuery(criterion.getField()) - .gte(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.LESS_THAN) { - return QueryBuilders.rangeQuery(criterion.getField()) - .lt(criterion.getValue().trim()) - .queryName(fieldName); - } else if (condition == Condition.LESS_THAN_OR_EQUAL_TO) { - return QueryBuilders.rangeQuery(criterion.getField()) - .lte(criterion.getValue().trim()) - .queryName(fieldName); + return buildEqualsConditionFromCriterion( + fieldName, criterion, isTimeseries, searchableFieldTypes); + } else if (RANGE_QUERY_CONDITIONS.contains(condition)) { + return buildRangeQueryFromCriterion( + criterion, fieldName, searchableFieldTypes, condition, isTimeseries); } else if (condition == Condition.CONTAIN) { return QueryBuilders.wildcardQuery( toKeywordField(criterion.getField(), isTimeseries), @@ -504,13 +513,15 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( private static QueryBuilder buildEqualsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { + final boolean isTimeseries, + final Map> searchableFieldTypes) { /* * If the newer 'values' field of Criterion.pdl is set, then we * handle using the following code to allow multi-match. */ if (!criterion.getValues().isEmpty()) { - return buildEqualsConditionFromCriterionWithValues(fieldName, criterion, isTimeseries); + return buildEqualsConditionFromCriterionWithValues( + fieldName, criterion, isTimeseries, searchableFieldTypes); } /* * Otherwise, we are likely using the deprecated 'value' field. @@ -526,21 +537,91 @@ private static QueryBuilder buildEqualsConditionFromCriterion( private static QueryBuilder buildEqualsConditionFromCriterionWithValues( @Nonnull final String fieldName, @Nonnull final Criterion criterion, - final boolean isTimeseries) { - if (BOOLEAN_FIELDS.contains(fieldName) && criterion.getValues().size() == 1) { - // Handle special-cased Boolean fields. - // here we special case boolean fields we recognize the names of and hard-cast - // the first provided value to a boolean to do the comparison. - // Ideally, we should detect the type of the field from the entity-registry in order - // to determine how to cast. + final boolean isTimeseries, + final Map> searchableFieldTypes) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + if (fieldTypes.size() > 1) { + log.warn( + "Multiple field types for field name {}, determining best fit for set: {}", + fieldName, + fieldTypes); + } + if (fieldTypes.contains(BOOLEAN_FIELD_TYPE) && criterion.getValues().size() == 1) { return QueryBuilders.termQuery(fieldName, Boolean.parseBoolean(criterion.getValues().get(0))) .queryName(fieldName); + } else if (fieldTypes.contains(LONG_FIELD_TYPE) || fieldTypes.contains(DATE_FIELD_TYPE)) { + List longValues = + criterion.getValues().stream().map(Long::parseLong).collect(Collectors.toList()); + return QueryBuilders.termsQuery(fieldName, longValues).queryName(fieldName); + } else if (fieldTypes.contains(DOUBLE_FIELD_TYPE)) { + List doubleValues = + criterion.getValues().stream().map(Double::parseDouble).collect(Collectors.toList()); + return QueryBuilders.termsQuery(fieldName, doubleValues).queryName(fieldName); } return QueryBuilders.termsQuery( toKeywordField(criterion.getField(), isTimeseries), criterion.getValues()) .queryName(fieldName); } + private static Set getFieldTypes( + Map> searchableFields, String fieldName) { + Set fieldTypes = + searchableFields.getOrDefault(fieldName, Collections.emptySet()); + Set finalFieldTypes = + fieldTypes.stream().map(ESUtils::getElasticTypeForFieldType).collect(Collectors.toSet()); + if (fieldTypes.size() > 1) { + log.warn( + "Multiple field types for field name {}, determining best fit for set: {}", + fieldName, + fieldTypes); + } + return finalFieldTypes; + } + + private static RangeQueryBuilder buildRangeQueryFromCriterion( + Criterion criterion, + String fieldName, + Map> searchableFieldTypes, + Condition condition, + boolean isTimeseries) { + Set fieldTypes = getFieldTypes(searchableFieldTypes, fieldName); + + // Determine criterion value, range query only accepts single value so take first value in + // values if multiple + String criterionValueString; + if (!criterion.getValues().isEmpty()) { + criterionValueString = criterion.getValues().get(0).trim(); + } else { + criterionValueString = criterion.getValue().trim(); + } + Object criterionValue; + String documentFieldName; + if (fieldTypes.contains(BOOLEAN_FIELD_TYPE)) { + criterionValue = Boolean.parseBoolean(criterionValueString); + documentFieldName = criterion.getField(); + } else if (fieldTypes.contains(LONG_FIELD_TYPE) || fieldTypes.contains(DATE_FIELD_TYPE)) { + criterionValue = Long.parseLong(criterionValueString); + documentFieldName = criterion.getField(); + } else if (fieldTypes.contains(DOUBLE_FIELD_TYPE)) { + criterionValue = Double.parseDouble(criterionValueString); + documentFieldName = criterion.getField(); + } else { + criterionValue = criterionValueString; + documentFieldName = toKeywordField(criterion.getField(), isTimeseries); + } + + // Set up QueryBuilder based on condition + if (condition == Condition.GREATER_THAN) { + return QueryBuilders.rangeQuery(documentFieldName).gt(criterionValue).queryName(fieldName); + } else if (condition == Condition.GREATER_THAN_OR_EQUAL_TO) { + return QueryBuilders.rangeQuery(documentFieldName).gte(criterionValue).queryName(fieldName); + } else if (condition == Condition.LESS_THAN) { + return QueryBuilders.rangeQuery(documentFieldName).lt(criterionValue).queryName(fieldName); + } else /*if (condition == Condition.LESS_THAN_OR_EQUAL_TO)*/ { + return QueryBuilders.rangeQuery(documentFieldName).lte(criterionValue).queryName(fieldName); + } + } + /** * Builds an instance of {@link QueryBuilder} representing an EQUALS condition which was created * using the deprecated 'value' field of Criterion.pdl model. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index a2b36b7d8ddb8..cb06dc75c70bc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -14,6 +14,7 @@ import com.linkedin.metadata.aspect.EnvelopedAspect; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.annotation.SearchableAnnotation; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; @@ -290,7 +291,12 @@ public long countByFilter( @Nullable final Filter filter) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery() + .must( + ESUtils.buildFilterQuery( + filter, + true, + _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes())); CountRequest countRequest = new CountRequest(); countRequest.query(filterQueryBuilder); countRequest.indices(indexName); @@ -313,8 +319,11 @@ public List getAspectValues( @Nullable final Integer limit, @Nullable final Filter filter, @Nullable final SortCriterion sort) { + Map> searchableFieldTypes = + _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().must(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery() + .must(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes)); filterQueryBuilder.must(QueryBuilders.matchQuery("urn", urn.toString())); // NOTE: We are interested only in the un-exploded rows as only they carry the `event` payload. filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); @@ -324,7 +333,8 @@ public List getAspectValues( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); - filterQueryBuilder.must(ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true)); + filterQueryBuilder.must( + ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -332,7 +342,8 @@ public List getAspectValues( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); - filterQueryBuilder.must(ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true)); + filterQueryBuilder.must( + ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); } final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.query(filterQueryBuilder); @@ -400,7 +411,9 @@ public GenericTable getAggregatedStats( public DeleteAspectValuesResult deleteAspectValues( @Nonnull String entityName, @Nonnull String aspectName, @Nonnull Filter filter) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); final Optional result = _bulkProcessor @@ -426,7 +439,9 @@ public String deleteAspectValuesAsync( @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); final int batchSize = options.getBatchSize() > 0 ? options.getBatchSize() : DEFAULT_LIMIT; TimeValue timeout = options.getTimeoutSeconds() > 0 @@ -450,7 +465,9 @@ public String reindexAsync( @Nonnull Filter filter, @Nonnull BatchWriteOperationsOptions options) { final String indexName = _indexConvention.getTimeseriesAspectIndexName(entityName, aspectName); - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); try { return this.reindexAsync(indexName, filterQueryBuilder, options); } catch (Exception e) { @@ -498,8 +515,12 @@ public TimeseriesScrollResult scrollAspects( int count, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) { + + Map> searchableFieldTypes = + _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes(); final BoolQueryBuilder filterQueryBuilder = - QueryBuilders.boolQuery().filter(ESUtils.buildFilterQuery(filter, true)); + QueryBuilders.boolQuery() + .filter(ESUtils.buildFilterQuery(filter, true, searchableFieldTypes)); if (startTimeMillis != null) { Criterion startTimeCriterion = @@ -507,7 +528,8 @@ public TimeseriesScrollResult scrollAspects( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) .setValue(startTimeMillis.toString()); - filterQueryBuilder.filter(ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true)); + filterQueryBuilder.filter( + ESUtils.getQueryBuilderFromCriterion(startTimeCriterion, true, searchableFieldTypes)); } if (endTimeMillis != null) { Criterion endTimeCriterion = @@ -515,7 +537,8 @@ public TimeseriesScrollResult scrollAspects( .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) .setValue(endTimeMillis.toString()); - filterQueryBuilder.filter(ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true)); + filterQueryBuilder.filter( + ESUtils.getQueryBuilderFromCriterion(endTimeCriterion, true, searchableFieldTypes)); } SearchResponse response = @@ -537,7 +560,7 @@ public TimeseriesScrollResult scrollAspects( } private SearchResponse executeScrollSearchQuery( - @Nonnull final String entityNname, + @Nonnull final String entityName, @Nonnull final String aspectName, @Nonnull final QueryBuilder query, @Nonnull List sortCriterion, @@ -560,7 +583,7 @@ private SearchResponse executeScrollSearchQuery( searchRequest.source(searchSourceBuilder); ESUtils.setSearchAfter(searchSourceBuilder, sort, null, null); - searchRequest.indices(_indexConvention.getTimeseriesAspectIndexName(entityNname, aspectName)); + searchRequest.indices(_indexConvention.getTimeseriesAspectIndexName(entityName, aspectName)); try (Timer.Context ignored = MetricUtils.timer(this.getClass(), "scrollAspects_search").time()) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java index 539e5dfbaa1d0..580888e54b700 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/query/ESAggregatedStatsDAO.java @@ -377,7 +377,9 @@ public GenericTable getAggregatedStats( @Nullable GroupingBucket[] groupingBuckets) { // Setup the filter query builder using the input filter provided. - final BoolQueryBuilder filterQueryBuilder = ESUtils.buildFilterQuery(filter, true); + final BoolQueryBuilder filterQueryBuilder = + ESUtils.buildFilterQuery( + filter, true, _entityRegistry.getEntitySpec(entityName).getSearchableFieldTypes()); AspectSpec aspectSpec = getTimeseriesAspectSpec(entityName, aspectName); // Build and attach the grouping aggregations diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java index d2aef982750bd..4c125065deb4d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java @@ -1,18 +1,27 @@ package com.linkedin.metadata.search.fixtures; +import static com.linkedin.metadata.Constants.*; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossCustomEntities; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; -import static org.testng.Assert.assertTrue; +import static org.testng.Assert.*; import static org.testng.AssertJUnit.assertNotNull; +import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.filter.ConjunctiveCriterion; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.CriterionArray; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.MatchedFieldArray; import com.linkedin.metadata.search.SearchEntityArray; import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.search.SearchService; +import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -169,6 +178,35 @@ public void testNameMatchCustomerOrders() { assertTrue(firstResultScore > secondResultScore); } + @Test + public void testFilterOnCountField() { + assertNotNull(getSearchService()); + Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + ImmutableList.of( + new Criterion() + .setField("rowCount") + .setValue("") + .setValues(new StringArray(ImmutableList.of("68")))))))); + SearchResult searchResult = + searchAcrossEntities( + getSearchService(), + "*", + SEARCHABLE_LONGTAIL_ENTITIES, + filter, + Collections.singletonList(DATASET_ENTITY_NAME)); + assertFalse(searchResult.getEntities().isEmpty()); + Urn firstResultUrn = searchResult.getEntities().get(0).getEntity(); + assertEquals( + firstResultUrn.toString(), + "urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.analytics.dogs_in_movies,PROD)"); + } + /* Tests that should pass but do not yet can be added below here, with the following annotation: @Test(enabled = false) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index a1af2325ee0ed..4742115b16e1b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -14,8 +14,10 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.types.chart.ChartType; import com.linkedin.datahub.graphql.types.container.ContainerType; @@ -45,6 +47,7 @@ import com.linkedin.r2.RemoteInvocationException; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -64,6 +67,7 @@ import org.opensearch.search.sort.FieldSortBuilder; import org.opensearch.search.sort.SortBuilder; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.AssertJUnit; import org.testng.annotations.Test; public abstract class SampleDataFixtureTestBase extends AbstractTestNGSpringContextTests { @@ -1936,6 +1940,56 @@ public void testSortOrdering() { String.format("%s - Expected search results to have at least two results", query)); } + @Test + public void testFilterOnHasValuesField() { + AssertJUnit.assertNotNull(getSearchService()); + Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + ImmutableList.of( + new Criterion() + .setField("hasOwners") + .setValue("") + .setValues(new StringArray(ImmutableList.of("true")))))))); + SearchResult searchResult = + searchAcrossEntities( + getSearchService(), + "*", + SEARCHABLE_ENTITIES, + filter, + Collections.singletonList(DATASET_ENTITY_NAME)); + assertEquals(searchResult.getEntities().size(), 8); + } + + @Test + public void testFilterOnNumValuesField() { + AssertJUnit.assertNotNull(getSearchService()); + Filter filter = + new Filter() + .setOr( + new ConjunctiveCriterionArray( + new ConjunctiveCriterion() + .setAnd( + new CriterionArray( + ImmutableList.of( + new Criterion() + .setField("numInputDatasets") + .setValue("") + .setValues(new StringArray(ImmutableList.of("1")))))))); + SearchResult searchResult = + searchAcrossEntities( + getSearchService(), + "*", + SEARCHABLE_ENTITIES, + filter, + Collections.singletonList(DATA_JOB_ENTITY_NAME)); + assertEquals(searchResult.getEntities().size(), 4); + } + private Stream getTokens(AnalyzeRequest request) throws IOException { return getSearchClient() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java index 6df31b35fecde..8d504c562c99c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/indexbuilder/MappingsBuilderTest.java @@ -21,7 +21,7 @@ public void testMappingsBuilder() { Map result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec()); assertEquals(result.size(), 1); Map properties = (Map) result.get("properties"); - assertEquals(properties.size(), 20); + assertEquals(properties.size(), 21); assertEquals( properties.get("urn"), ImmutableMap.of( @@ -52,6 +52,7 @@ public void testMappingsBuilder() { assertEquals(properties.get("runId"), ImmutableMap.of("type", "keyword")); assertTrue(properties.containsKey("browsePaths")); assertTrue(properties.containsKey("browsePathV2")); + assertTrue(properties.containsKey("removed")); // KEYWORD Map keyPart3Field = (Map) properties.get("keyPart3"); assertEquals(keyPart3Field.get("type"), "keyword"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index daf2ac58002e0..02c9ea800f0af 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -614,7 +614,7 @@ public void testBrowsePathQueryFilter() { Filter filter = new Filter(); filter.setOr(conjunctiveCriterionArray); - BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter); + BoolQueryBuilder test = SearchRequestHandler.getFilterQuery(filter, new HashMap<>()); assertEquals(test.should().size(), 1); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index 980b82194536e..838df98fdce9c 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -4,6 +4,7 @@ import com.linkedin.data.template.StringArray; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Criterion; +import java.util.HashMap; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,7 +22,8 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"terms\" : {\n" @@ -40,7 +42,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false); + result = ESUtils.getQueryBuilderFromCriterion(multiValueCriterion, false, new HashMap<>()); expected = "{\n" + " \"terms\" : {\n" @@ -60,7 +62,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1", "value2"))); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"terms\" : {\n" @@ -80,7 +82,8 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion singleValueCriterion = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -103,7 +106,7 @@ public void testGetQueryBuilderFromCriterionExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -128,7 +131,8 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion singleValueCriterion = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -151,7 +155,7 @@ public void testGetQueryBuilderFromCriterionIsNull() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -182,7 +186,8 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1"))); // Ensure that the query is expanded! - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -220,7 +225,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { .setValues(new StringArray(ImmutableList.of("value1", "value2"))); // Ensure that the query is expanded without keyword. - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" @@ -262,7 +267,8 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { .setCondition(Condition.EQUAL) .setValues(new StringArray(ImmutableList.of("value1"))); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"terms\" : {\n" @@ -281,7 +287,8 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion singleValueCriterion = new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); - QueryBuilder result = ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false); + QueryBuilder result = + ESUtils.getQueryBuilderFromCriterion(singleValueCriterion, false, new HashMap<>()); String expected = "{\n" + " \"bool\" : {\n" @@ -304,7 +311,7 @@ public void testGetQueryBuilderFromStructPropExists() { final Criterion timeseriesField = new Criterion().setField("myTestField").setCondition(Condition.EXISTS); - result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true); + result = ESUtils.getQueryBuilderFromCriterion(timeseriesField, true, new HashMap<>()); expected = "{\n" + " \"bool\" : {\n" diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 8d7701f6d174f..23ca4a4a4247e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -485,6 +485,65 @@ public void testGetAggregatedStatsLatestStatForDay1() { _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); } + @Test( + groups = {"getAggregatedStats"}, + dependsOnGroups = {"upsert"}) + public void testGetAggregatedStatsLatestStatForDay1WithValues() { + // Filter is only on the urn + Criterion hasUrnCriterion = + new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion startTimeCriterion = + new Criterion() + .setField(ES_FIELD_TIMESTAMP) + .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) + .setValues(new StringArray(_startTime.toString())) + .setValue(""); + Criterion endTimeCriterion = + new Criterion() + .setField(ES_FIELD_TIMESTAMP) + .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) + .setValues(new StringArray(String.valueOf(_startTime + 23 * TIME_INCREMENT))) + .setValue(""); + + Filter filter = + QueryUtils.getFilterFromCriteria( + ImmutableList.of(hasUrnCriterion, startTimeCriterion, endTimeCriterion)); + + // Aggregate on latest stat value + AggregationSpec latestStatAggregationSpec = + new AggregationSpec().setAggregationType(AggregationType.LATEST).setFieldPath("stat"); + + // Grouping bucket is only timestamp filed. + GroupingBucket timestampBucket = + new GroupingBucket() + .setKey(ES_FIELD_TIMESTAMP) + .setType(GroupingBucketType.DATE_GROUPING_BUCKET) + .setTimeWindowSize(new TimeWindowSize().setMultiple(1).setUnit(CalendarInterval.DAY)); + + GenericTable resultTable = + _elasticSearchTimeseriesAspectService.getAggregatedStats( + ENTITY_NAME, + ASPECT_NAME, + new AggregationSpec[] {latestStatAggregationSpec}, + filter, + new GroupingBucket[] {timestampBucket}); + // Validate column names + assertEquals( + resultTable.getColumnNames(), + new StringArray(ES_FIELD_TIMESTAMP, "latest_" + ES_FIELD_STAT)); + // Validate column types + assertEquals(resultTable.getColumnTypes(), new StringArray("long", "long")); + // Validate rows + assertNotNull(resultTable.getRows()); + assertEquals(resultTable.getRows().size(), 1); + assertEquals( + resultTable.getRows(), + new StringArrayArray( + new StringArray( + _startTime.toString(), + _testEntityProfiles.get(_startTime + 23 * TIME_INCREMENT).getStat().toString()))); + } + @Test( groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) diff --git a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java index a22a774065852..f3689f9b5d04a 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java +++ b/metadata-io/src/test/java/io/datahubproject/test/search/SearchTestUtils.java @@ -15,6 +15,7 @@ import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.query.SearchFlags; +import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.LineageSearchResult; import com.linkedin.metadata.search.LineageSearchService; import com.linkedin.metadata.search.ScrollResult; @@ -70,6 +71,23 @@ public static SearchResult searchAcrossEntities( facets); } + public static SearchResult searchAcrossEntities( + SearchService searchService, + String query, + @Nullable List facets, + Filter filter, + List entityNames) { + return searchService.searchAcrossEntities( + entityNames, + query, + filter, + null, + 0, + 100, + new SearchFlags().setFulltext(true).setSkipCache(true), + facets); + } + public static SearchResult searchAcrossCustomEntities( SearchService searchService, String query, List searchableEntities) { return searchService.searchAcrossEntities( diff --git a/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz b/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz index dd48fe240cdf2..5a412ff4b14e0 100644 Binary files a/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz and b/metadata-io/src/test/resources/elasticsearch/long_tail/datasetindex_v2.json.gz differ diff --git a/metadata-models/src/main/pegasus/com/linkedin/settings/global/OidcSettings.pdl b/metadata-models/src/main/pegasus/com/linkedin/settings/global/OidcSettings.pdl index d5b23c28cb227..f925505c8e54f 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/settings/global/OidcSettings.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/settings/global/OidcSettings.pdl @@ -90,7 +90,12 @@ record OidcSettings { extractJwtAccessTokenClaims: optional boolean /** - * ADVANCED. Which jws algorithm to use. + * ADVANCED. Which jws algorithm to use. Unused. */ preferredJwsAlgorithm: optional string -} \ No newline at end of file + + /** + * ADVANCED. Which jws algorithm to use. + */ + preferredJwsAlgorithm2: optional string +} diff --git a/metadata-service/auth-servlet-impl/build.gradle b/metadata-service/auth-servlet-impl/build.gradle index b8310bbd4ebc0..29e452472358b 100644 --- a/metadata-service/auth-servlet-impl/build.gradle +++ b/metadata-service/auth-servlet-impl/build.gradle @@ -18,4 +18,12 @@ dependencies { compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok + + testImplementation externalDependency.testng + testImplementation externalDependency.springBootTest +} + +test { + testLogging.showStandardStreams = true + testLogging.exceptionFormat = 'full' } diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index 430ed2d236219..fc283b7e986bb 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -72,7 +72,9 @@ public class AuthServiceController { private static final String USE_NONCE = "useNonce"; private static final String READ_TIMEOUT = "readTimeout"; private static final String EXTRACT_JWT_ACCESS_TOKEN_CLAIMS = "extractJwtAccessTokenClaims"; + // Retained for backwards compatibility private static final String PREFERRED_JWS_ALGORITHM = "preferredJwsAlgorithm"; + private static final String PREFERRED_JWS_ALGORITHM_2 = "preferredJwsAlgorithm2"; @Inject StatelessTokenService _statelessTokenService; @@ -514,8 +516,8 @@ private void buildOidcSettingsResponse(JSONObject json, final OidcSettings oidcS if (oidcSettings.hasExtractJwtAccessTokenClaims()) { json.put(EXTRACT_JWT_ACCESS_TOKEN_CLAIMS, oidcSettings.isExtractJwtAccessTokenClaims()); } - if (oidcSettings.hasPreferredJwsAlgorithm()) { - json.put(PREFERRED_JWS_ALGORITHM, oidcSettings.getPreferredJwsAlgorithm()); + if (oidcSettings.hasPreferredJwsAlgorithm2()) { + json.put(PREFERRED_JWS_ALGORITHM, oidcSettings.getPreferredJwsAlgorithm2()); } } } diff --git a/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceControllerTest.java b/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceControllerTest.java new file mode 100644 index 0000000000000..bb305ae16900c --- /dev/null +++ b/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceControllerTest.java @@ -0,0 +1,96 @@ +package com.datahub.auth.authentication; + +import static com.linkedin.metadata.Constants.GLOBAL_SETTINGS_INFO_ASPECT_NAME; +import static com.linkedin.metadata.Constants.GLOBAL_SETTINGS_URN; +import static org.mockito.Mockito.when; +import static org.testng.Assert.*; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.settings.global.GlobalSettingsInfo; +import com.linkedin.settings.global.OidcSettings; +import com.linkedin.settings.global.SsoSettings; +import java.io.IOException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Import; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.springframework.web.servlet.DispatcherServlet; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +@SpringBootTest(classes = {DispatcherServlet.class}) +@ComponentScan(basePackages = {"com.datahub.auth.authentication"}) +@Import({AuthServiceTestConfiguration.class}) +public class AuthServiceControllerTest extends AbstractTestNGSpringContextTests { + @BeforeTest + public void disableAssert() { + PathSpecBasedSchemaAnnotationVisitor.class + .getClassLoader() + .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); + } + + @Autowired private AuthServiceController authServiceController; + @Autowired private EntityService mockEntityService; + + private final String PREFERRED_JWS_ALGORITHM = "preferredJwsAlgorithm"; + + @Test + public void initTest() { + assertNotNull(authServiceController); + assertNotNull(mockEntityService); + } + + @Test + public void oldPreferredJwsAlgorithmIsNotReturned() throws IOException { + OidcSettings mockOidcSettings = + new OidcSettings() + .setEnabled(true) + .setClientId("1") + .setClientSecret("2") + .setDiscoveryUri("http://localhost") + .setPreferredJwsAlgorithm("test"); + SsoSettings mockSsoSettings = + new SsoSettings().setBaseUrl("http://localhost").setOidcSettings(mockOidcSettings); + GlobalSettingsInfo mockGlobalSettingsInfo = new GlobalSettingsInfo().setSso(mockSsoSettings); + + when(mockEntityService.getLatestAspect(GLOBAL_SETTINGS_URN, GLOBAL_SETTINGS_INFO_ASPECT_NAME)) + .thenReturn(mockGlobalSettingsInfo); + + ResponseEntity httpResponse = authServiceController.getSsoSettings(null).join(); + assertEquals(httpResponse.getStatusCode(), HttpStatus.OK); + + JsonNode jsonNode = new ObjectMapper().readTree(httpResponse.getBody()); + assertFalse(jsonNode.has(PREFERRED_JWS_ALGORITHM)); + } + + @Test + public void newPreferredJwsAlgorithmIsReturned() throws IOException { + OidcSettings mockOidcSettings = + new OidcSettings() + .setEnabled(true) + .setClientId("1") + .setClientSecret("2") + .setDiscoveryUri("http://localhost") + .setPreferredJwsAlgorithm("jws1") + .setPreferredJwsAlgorithm2("jws2"); + SsoSettings mockSsoSettings = + new SsoSettings().setBaseUrl("http://localhost").setOidcSettings(mockOidcSettings); + GlobalSettingsInfo mockGlobalSettingsInfo = new GlobalSettingsInfo().setSso(mockSsoSettings); + + when(mockEntityService.getLatestAspect(GLOBAL_SETTINGS_URN, GLOBAL_SETTINGS_INFO_ASPECT_NAME)) + .thenReturn(mockGlobalSettingsInfo); + + ResponseEntity httpResponse = authServiceController.getSsoSettings(null).join(); + assertEquals(httpResponse.getStatusCode(), HttpStatus.OK); + + JsonNode jsonNode = new ObjectMapper().readTree(httpResponse.getBody()); + assertTrue(jsonNode.has(PREFERRED_JWS_ALGORITHM)); + assertEquals(jsonNode.get(PREFERRED_JWS_ALGORITHM).asText(), "jws2"); + } +} diff --git a/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceTestConfiguration.java b/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceTestConfiguration.java new file mode 100644 index 0000000000000..428f14e67d137 --- /dev/null +++ b/metadata-service/auth-servlet-impl/src/test/java/com/datahub/auth/authentication/AuthServiceTestConfiguration.java @@ -0,0 +1,32 @@ +package com.datahub.auth.authentication; + +import com.datahub.authentication.Authentication; +import com.datahub.authentication.invite.InviteTokenService; +import com.datahub.authentication.token.StatelessTokenService; +import com.datahub.authentication.user.NativeUserService; +import com.datahub.telemetry.TrackingService; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.secret.SecretService; +import org.springframework.boot.test.context.TestConfiguration; +import org.springframework.boot.test.mock.mockito.MockBean; + +@TestConfiguration +public class AuthServiceTestConfiguration { + @MockBean StatelessTokenService _statelessTokenService; + + @MockBean Authentication _systemAuthentication; + + @MockBean(name = "configurationProvider") + ConfigurationProvider _configProvider; + + @MockBean NativeUserService _nativeUserService; + + @MockBean EntityService _entityService; + + @MockBean SecretService _secretService; + + @MockBean InviteTokenService _inviteTokenService; + + @MockBean TrackingService _trackingService; +} diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js index dbc4e1db72943..b0e24d5346fea 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js @@ -1,6 +1,6 @@ const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; const datasetName = "cypress_logging_events"; -const glossaryTerm = "CypressGlosssaryTerm"; +const glossaryTerm = "CypressGlossaryTerm"; const glossaryTermGroup = "CypressGlossaryGroup"; describe("glossary", () => { @@ -8,9 +8,9 @@ describe("glossary", () => { cy.loginWithCredentials(); cy.goToGlossaryList(); cy.clickOptionWithText("Add Term"); - cy.addViaModal(glossaryTerm, "Create Glossary Term", glossaryTerm); + cy.addViaModal(glossaryTerm, "Create Glossary Term", glossaryTerm, "glossary-entity-modal-create-button"); cy.clickOptionWithText("Add Term Group"); - cy.addViaModal(glossaryTermGroup, "Create Term Group", glossaryTermGroup); + cy.addViaModal(glossaryTermGroup, "Create Term Group", glossaryTermGroup, "glossary-entity-modal-create-button"); cy.addTermToDataset(urn, datasetName, glossaryTerm); cy.waitTextVisible(glossaryTerm) cy.goToGlossaryList(); diff --git a/smoke-test/tests/cypress/cypress/e2e/login/login.js b/smoke-test/tests/cypress/cypress/e2e/login/login.js index 309eedb10b6da..cfeb2619593ff 100644 --- a/smoke-test/tests/cypress/cypress/e2e/login/login.js +++ b/smoke-test/tests/cypress/cypress/e2e/login/login.js @@ -1,8 +1,8 @@ describe('login', () => { it('logs in', () => { cy.visit('/'); - cy.get('input[data-testid=username]').type(Cypress.env('ADMIN_USERNAME')); - cy.get('input[data-testid=password]').type(Cypress.env('ADMIN_PASSWORD')); + cy.get('input[data-testid=username]').type('datahub'); + cy.get('input[data-testid=password]').type('datahub'); cy.contains('Sign In').click(); cy.contains('Welcome back, ' + Cypress.env('ADMIN_DISPLAYNAME')); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js index 5f9758a35ca0e..c6d2b205250e0 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -78,17 +78,18 @@ describe("edit documentation and link to dataset", () => { cy.visit( "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" ); - cy.get("tbody [data-icon='edit']").first().click({ force: true }); + cy.clickOptionWithText("field_foo"); + cy.clickOptionWithTestId("edit-field-description"); cy.waitTextVisible("Update description"); cy.waitTextVisible("Foo field description has changed"); - cy.focused().clear().wait(1000); + cy.getWithTestId("description-editor").clear().wait(1000); cy.focused().type(documentation_edited); cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); cy.waitTextVisible(documentation_edited); cy.waitTextVisible("(edited)"); - cy.get("tbody [data-icon='edit']").first().click({ force: true }); - cy.focused().clear().wait(1000); + cy.clickOptionWithTestId("edit-field-description"); + cy.getWithTestId("description-editor").clear().wait(1000); cy.focused().type("Foo field description has changed"); cy.clickOptionWithTestId("description-modal-update-button"); cy.waitTextVisible("Updated!"); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index 05f94c94bfe2a..c355aaabc336a 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -26,7 +26,7 @@ describe("run managed ingestion", () => { cy.enterTextInTestId('source-name-input', testName) cy.clickOptionWithText("Advanced") cy.enterTextInTestId('cli-version-input', cli_version) - cy.clickOptionWithText("Save & Run") + cy.clickOptionWithTextToScrollintoView("Save & Run") cy.waitTextVisible(testName) cy.contains(testName).parent().within(() => { diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js index 1baa33901724f..7f8a4e4f8f335 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js @@ -77,7 +77,7 @@ describe("mutations", () => { cy.login(); cy.viewport(2000, 800); cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events"); - cy.mouseover('[data-testid="schema-field-event_name-tags"]'); + cy.clickOptionWithText("event_name"); cy.get('[data-testid="schema-field-event_name-tags"]').within(() => cy.contains("Add Tag").click() ); @@ -116,7 +116,8 @@ describe("mutations", () => { // verify dataset shows up in search now cy.contains("of 1 result").click(); cy.contains("cypress_logging_events").click(); - cy.get('[data-testid="tag-CypressTestAddTag2"]').within(() => + cy.clickOptionWithText("event_name"); + cy.get('[data-testid="schema-field-event_name-tags"]').within(() => cy .get("span[aria-label=close]") .trigger("mouseover", { force: true }) @@ -134,10 +135,7 @@ describe("mutations", () => { // make space for the glossary term column cy.viewport(2000, 800); cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events"); - cy.get('[data-testid="schema-field-event_name-terms"]').trigger( - "mouseover", - { force: true } - ); + cy.clickOptionWithText("event_name"); cy.get('[data-testid="schema-field-event_name-terms"]').within(() => cy.contains("Add Term").click({ force: true }) ); @@ -146,9 +144,12 @@ describe("mutations", () => { cy.contains("CypressTerm"); - cy.get( - 'a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]' - ).within(() => cy.get("span[aria-label=close]").click({ force: true })); + cy.get('[data-testid="schema-field-event_name-terms"]').within(() => + cy + .get("span[aria-label=close]") + .trigger("mouseover", { force: true }) + .click({ force: true }) + ); cy.contains("Yes").click({ force: true }); cy.contains("CypressTerm").should("not.exist"); diff --git a/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js b/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js index 6e282b5249636..1ce1fbe900172 100644 --- a/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js +++ b/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js @@ -14,6 +14,7 @@ describe('schema blame', () => { cy.contains('field_bar').should('not.exist'); cy.contains('Foo field description has changed'); cy.contains('Baz field description'); + cy.clickOptionWithText("field_foo"); cy.get('[data-testid="schema-field-field_foo-tags"]').contains('Legacy'); // Make sure the schema blame is accurate @@ -41,6 +42,7 @@ describe('schema blame', () => { cy.contains('field_baz').should('not.exist'); cy.contains('Foo field description'); cy.contains('Bar field description'); + cy.clickOptionWithText("field_foo"); cy.get('[data-testid="schema-field-field_foo-tags"]').contains('Legacy').should('not.exist'); // Make sure the schema blame is accurate diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index ba5600b79f5f6..51b06a24c1921 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -183,10 +183,10 @@ Cypress.Commands.add("addViaFormModal", (text, modelHeader) => { cy.get(".ant-modal-footer > button:nth-child(2)").click(); }); -Cypress.Commands.add("addViaModal", (text, modelHeader,value) => { +Cypress.Commands.add("addViaModal", (text, modelHeader, value, dataTestId) => { cy.waitTextVisible(modelHeader); cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(text); - cy.get(".ant-modal-footer > button:nth-child(2)").click(); + cy.get('[data-testid="' + dataTestId + '"]').click(); cy.contains(value).should('be.visible'); }); @@ -218,6 +218,10 @@ Cypress.Commands.add( 'multiSelect', (within_data_id , text) => { cy.clickOptionWithText(text); }); +Cypress.Commands.add("getWithTestId", (id) => { + return cy.get(selectorWithtestId(id)); +}); + Cypress.Commands.add("enterTextInTestId", (id, text) => { cy.get(selectorWithtestId(id)).type(text); }) diff --git a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl index 3b8aa4f39f7b7..d1daa7b8d4593 100644 --- a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl +++ b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl @@ -97,4 +97,10 @@ record TestEntityInfo includes CustomProperties { "fieldType": "DOUBLE" } doubleField: optional double + + @Searchable = { + "fieldName": "removed", + "fieldType": "BOOLEAN" + } + removed: optional boolean }