Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Aug 1, 2024
2 parents 51a020c + 66ecfae commit 1ad6aad
Show file tree
Hide file tree
Showing 76 changed files with 3,501 additions and 288 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/airflow-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/airflow-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extra_pip_requirements == 'apache-airflow>=2.7.0' }}
with:
name: Test Results (Airflow Plugin ${{ matrix.python-version}})
Expand All @@ -98,7 +98,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
4 changes: 2 additions & 2 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ jobs:
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
run: |
./gradlew -PjavaClassVersionDefault=8 :metadata-integration:java:spark-lineage:compileJava
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: always()
with:
name: Test Results (build)
Expand Down Expand Up @@ -128,7 +128,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
4 changes: 2 additions & 2 deletions .github/workflows/dagster-plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
- name: pip freeze show list installed
if: always()
run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && pip freeze
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }}
with:
name: Test Results (dagster Plugin ${{ matrix.python-version}})
Expand All @@ -79,7 +79,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
6 changes: 3 additions & 3 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1023,18 +1023,18 @@ jobs:
docker logs datahub-datahub-frontend-react-1 >& frontend-${{ matrix.test_strategy }}.log || true
docker logs datahub-upgrade-1 >& upgrade-${{ matrix.test_strategy }}.log || true
- name: Upload logs
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
if: failure()
with:
name: docker logs
path: "*.log"
- name: Upload screenshots
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
if: failure()
with:
name: cypress-snapshots-${{ matrix.test_strategy }}
path: smoke-test/tests/cypress/cypress/screenshots/
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: always()
with:
name: Test Results (smoke tests) ${{ matrix.test_strategy }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
df -hl
docker image ls
docker system df
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: Test Results (metadata ingestion ${{ matrix.python-version }})
path: |
Expand All @@ -106,7 +106,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
4 changes: 2 additions & 2 deletions .github/workflows/metadata-io.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
- name: Gradle build (and test)
run: |
./gradlew :metadata-io:test
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: always()
with:
name: Test Results (metadata-io)
Expand All @@ -78,7 +78,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Upload
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: Event File
path: ${{ github.event_path }}
4 changes: 2 additions & 2 deletions .github/workflows/spark-smoke-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ jobs:
docker logs elasticsearch >& elasticsearch-${{ matrix.test_strategy }}.log || true
docker logs datahub-frontend-react >& frontend-${{ matrix.test_strategy }}.log || true
- name: Upload logs
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
if: failure()
with:
name: docker logs
path: |
"**/build/container-logs/*.log"
"*.log"
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
if: always()
with:
name: Test Results (smoke tests)
Expand Down
15 changes: 8 additions & 7 deletions docker/datahub-frontend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,25 @@ RUN apk --no-cache --update-cache --available upgrade \

ENV LD_LIBRARY_PATH="/lib:/lib64"

FROM base as prod-install
FROM base as unpack

COPY ./datahub-frontend.zip /
RUN unzip datahub-frontend.zip -d /datahub-frontend \
&& mv /datahub-frontend/main/* /datahub-frontend \
&& rmdir /datahub-frontend/main \
&& rm datahub-frontend.zip
RUN unzip datahub-frontend.zip -d /tmp/out \
&& mv /tmp/out/main /datahub-frontend
COPY ./docker/monitoring/client-prometheus-config.yaml /datahub-frontend/
RUN chown -R datahub:datahub /datahub-frontend && chmod 755 /datahub-frontend

FROM base as prod-install

COPY --from=unpack /datahub-frontend/ /datahub-frontend/

FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
VOLUME [ "/datahub-frontend" ]

FROM ${APP_ENV}-install as final
COPY ./docker/datahub-frontend/start.sh /
RUN chown datahub:datahub /start.sh && chmod 755 /start.sh
COPY --chown=datahub:datahub --chmod=755 ./docker/datahub-frontend/start.sh /
USER datahub

ARG SERVER_PORT=9002
Expand Down
16 changes: 8 additions & 8 deletions docker/datahub-ingestion-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,18 @@ RUN apt-get update && apt-get install -y -qq \
RUN if [ $(arch) = "x86_64" ]; then \
mkdir /opt/oracle && \
cd /opt/oracle && \
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/216000/instantclient-basic-linux.x64-21.6.0.0.0dbru.zip && \
unzip instantclient-basic-linux.x64-21.6.0.0.0dbru.zip && \
rm instantclient-basic-linux.x64-21.6.0.0.0dbru.zip && \
sh -c "echo /opt/oracle/instantclient_21_6 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/2115000/instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
unzip instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
rm instantclient-basic-linux.x64-21.15.0.0.0dbru.zip && \
sh -c "echo /opt/oracle/instantclient_21_15 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
ldconfig; \
else \
mkdir /opt/oracle && \
cd /opt/oracle && \
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip && \
unzip instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip && \
rm instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip && \
sh -c "echo /opt/oracle/instantclient_19_10 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
wget --no-verbose -c https://download.oracle.com/otn_software/linux/instantclient/1923000/instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
unzip instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
rm instantclient-basic-linux.arm64-19.23.0.0.0dbru.zip && \
sh -c "echo /opt/oracle/instantclient_19_23 > /etc/ld.so.conf.d/oracle-instantclient.conf" && \
ldconfig; \
fi;

Expand Down
2 changes: 1 addition & 1 deletion docker/datahub-ingestion/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Defining environment
ARG APP_ENV=full
ARG BASE_IMAGE=acryldata/datahub-ingestion-base
ARG DOCKER_VERSION=head
ARG DOCKER_VERSION=head-full
ARG DEBIAN_REPO_URL=https://deb.debian.org/debian
ARG PIP_MIRROR_URL=https://pypi.python.org/simple

Expand Down
24 changes: 24 additions & 0 deletions docker/kafka-setup/env_to_properties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
import re
import sys


def env_to_properties(env_prefix: str, properties_file: str):
pattern = re.compile('(?<=[^_])_(?=[^_])')
props = {}

for (env_name, val) in os.environ.items():
if env_name.startswith(env_prefix):
raw_name = env_name[len(env_prefix):].lower()
prop_dot = '.'.join(pattern.split(raw_name))
props[prop_dot] = val

with open(properties_file, 'a') as f:
for k, v in props.items():
f.writelines(f'{k}={v}\n')


if __name__ == '__main__':
env_prefix = sys.argv[1]
properties_file = sys.argv[2]
env_to_properties(env_prefix, properties_file)
40 changes: 1 addition & 39 deletions docker/kafka-setup/kafka-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,46 +10,8 @@ fi
. kafka-config.sh

echo "bootstrap.servers=$KAFKA_BOOTSTRAP_SERVER" > $CONNECTION_PROPERTIES_PATH
echo "security.protocol=$KAFKA_PROPERTIES_SECURITY_PROTOCOL" >> $CONNECTION_PROPERTIES_PATH

## Add support for SASL_PLAINTEXT
if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SASL_PLAINTEXT" ]]; then
echo "sasl.mechanism=$KAFKA_PROPERTIES_SASL_MECHANISM" >> $CONNECTION_PROPERTIES_PATH
echo "sasl.jaas.config=$KAFKA_PROPERTIES_SASL_JAAS_CONFIG" >> $CONNECTION_PROPERTIES_PATH
echo "sasl.kerberos.service.name=$KAFKA_PROPERTIES_SASL_KERBEROS_SERVICE_NAME" >> $CONNECTION_PROPERTIES_PATH
fi

## Add support for SASL_SSL
if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SASL_SSL" ]]; then
echo "sasl.jaas.config=$KAFKA_PROPERTIES_SASL_JAAS_CONFIG" >> $CONNECTION_PROPERTIES_PATH
echo "sasl.mechanism=$KAFKA_PROPERTIES_SASL_MECHANISM" >> $CONNECTION_PROPERTIES_PATH
fi

if [[ $KAFKA_PROPERTIES_SECURITY_PROTOCOL == "SSL" ]]; then
if [[ -n $KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION ]]; then
echo "ssl.keystore.location=$KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH
echo "ssl.keystore.password=$KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH
echo "ssl.key.password=$KAFKA_PROPERTIES_SSL_KEY_PASSWORD" >> $CONNECTION_PROPERTIES_PATH
if [[ -n $KAFKA_PROPERTIES_SSL_KEYSTORE_TYPE ]]; then
echo "ssl.keystore.type=$KAFKA_PROPERTIES_SSL_KEYSTORE_TYPE" >> $CONNECTION_PROPERTIES_PATH
fi
fi
if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION ]]; then
echo "ssl.truststore.location=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION" >> $CONNECTION_PROPERTIES_PATH
if [[ $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE != "PEM" ]]; then
echo "ssl.truststore.password=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD" >> $CONNECTION_PROPERTIES_PATH
fi
if [[ -n $KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE ]]; then
echo "ssl.truststore.type=$KAFKA_PROPERTIES_SSL_TRUSTSTORE_TYPE" >> $CONNECTION_PROPERTIES_PATH
fi
fi
echo "ssl.endpoint.identification.algorithm=$KAFKA_PROPERTIES_SSL_ENDPOINT_IDENTIFICATION_ALGORITHM" >> $CONNECTION_PROPERTIES_PATH
fi

# Add support for SASL_CLIENT_CALLBACK_HANDLER_CLASS
if [[ -n "$KAFKA_PROPERTIES_SASL_CLIENT_CALLBACK_HANDLER_CLASS" ]]; then
echo "sasl.client.callback.handler.class=$KAFKA_PROPERTIES_SASL_CLIENT_CALLBACK_HANDLER_CLASS" >> $CONNECTION_PROPERTIES_PATH
fi
python env_to_properties.py KAFKA_PROPERTIES_ $CONNECTION_PROPERTIES_PATH

# cub kafka-ready -c $CONNECTION_PROPERTIES_PATH -b $KAFKA_BOOTSTRAP_SERVER 1 180
. kafka-ready.sh
Expand Down
8 changes: 4 additions & 4 deletions docs/actions/guides/developing-a-transformer.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ print the configuration that is provided when it is created, and print any Event
```python
# custom_transformer.py
from datahub_actions.transform.transformer import Transformer
from datahub_actions.event.event import EventEnvelope
from datahub_actions.event.event_envelope import EventEnvelope
from datahub_actions.pipeline.pipeline_context import PipelineContext
from typing import Optional

Expand Down Expand Up @@ -75,7 +75,7 @@ Next, install the package
pip install -e .
```

inside the module. (alt.`python setup.py`).
inside the module. (alt.`python setup.py`).

Once we have done this, our class will be referencable via `custom_transformer_example.custom_transformer:CustomTransformer`.

Expand All @@ -96,7 +96,7 @@ source:
connection:
bootstrap: ${KAFKA_BOOTSTRAP_SERVER:-localhost:9092}
schema_registry_url: ${SCHEMA_REGISTRY_URL:-http://localhost:8081}
transform:
transform:
- type: "custom_transformer_example.custom_transformer:CustomTransformer"
config:
# Some sample configuration which should be printed on create.
Expand Down Expand Up @@ -130,4 +130,4 @@ it without defining the full module path.
Prerequisites to consideration for inclusion in the core Transformer library include

- **Testing** Define unit tests for your Transformer
- **Deduplication** Confirm that no existing Transformer serves the same purpose, or can be easily extended to serve the same purpose
- **Deduplication** Confirm that no existing Transformer serves the same purpose, or can be easily extended to serve the same purpose
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@
},
# FIXME: I don't think tableau uses sqllineage anymore so we should be able
# to remove that dependency.
"tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
"tableau": {"tableauserverclient>=0.24.0"} | sqllineage_lib | sqlglot_lib,
"teradata": sql_common
| usage_common
| sqlglot_lib
Expand Down
1 change: 1 addition & 0 deletions metadata-ingestion/src/datahub/cli/get_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def urn(ctx: Any, urn: Optional[str], aspect: List[str], details: bool) -> None:
entity_urn=urn,
aspects=aspect,
typed=False,
details=details,
),
sort_keys=True,
indent=2,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Iterable, Optional

from pydantic.fields import Field
Expand All @@ -18,6 +19,8 @@
from datahub.specific.dashboard import DashboardPatchBuilder
from datahub.specific.dataset import DatasetPatchBuilder

logger = logging.getLogger(__name__)


def convert_upstream_lineage_to_patch(
urn: str,
Expand Down Expand Up @@ -48,6 +51,20 @@ def convert_chart_info_to_patch(
for inputEdge in aspect.inputEdges:
patch_builder.add_input_edge(inputEdge)

patch_builder.set_chart_url(aspect.chartUrl).set_external_url(
aspect.externalUrl
).set_type(aspect.type).set_title(aspect.title).set_access(
aspect.access
).set_last_modified(
aspect.lastModified
).set_last_refreshed(
aspect.lastRefreshed
).set_description(
aspect.description
).add_inputs(
aspect.inputs
)

values = patch_builder.build()
if values:
mcp = next(iter(values))
Expand Down Expand Up @@ -76,8 +93,36 @@ def convert_dashboard_info_to_patch(
for chartEdge in aspect.chartEdges:
patch_builder.add_chart_edge(chartEdge)

if aspect.title:
patch_builder.set_title(aspect.title)

if aspect.description:
patch_builder.set_description(aspect.description)

if aspect.charts:
patch_builder.add_charts(aspect.charts)

if aspect.dashboardUrl:
patch_builder.set_dashboard_url(aspect.dashboardUrl)

if aspect.datasets:
patch_builder.add_datasets(aspect.datasets)

if aspect.access:
patch_builder.set_access(aspect.access)

if aspect.lastRefreshed:
patch_builder.set_last_refreshed(aspect.lastRefreshed)

if aspect.lastModified:
patch_builder.set_last_modified(last_modified=aspect.lastModified)

values = patch_builder.build()

if values:
logger.debug(
f"Generating patch DashboardInfo MetadataWorkUnit for dashboard {aspect.title}"
)
mcp = next(iter(values))
return MetadataWorkUnit(
id=MetadataWorkUnit.generate_workunit_id(mcp), mcp_raw=mcp
Expand Down
Loading

0 comments on commit 1ad6aad

Please sign in to comment.