From 8cd620c168197e5a8ea7e34ff33cfa7eb6a5983f Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 17 Nov 2022 19:12:55 -0500 Subject: [PATCH 1/4] ci(ingest): test with python 3.11 --- .github/workflows/metadata-ingestion.yml | 4 ++-- metadata-ingestion-modules/airflow-plugin/setup.py | 1 + metadata-ingestion/setup.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index 043a32a4ce06f..bc3903b40648f 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -29,7 +29,7 @@ jobs: # DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }} strategy: matrix: - python-version: ["3.7", "3.10"] + python-version: ["3.7", "3.11"] command: [ "lint", @@ -41,7 +41,7 @@ jobs: include: - python-version: "3.7" extraPythonRequirement: "sqlalchemy==1.3.24 apache-airflow~=2.2.0" - - python-version: "3.10" + - python-version: "3.11" extraPythonRequirement: "sqlalchemy~=1.4.0 apache-airflow>=2.4.0" fail-fast: false steps: diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 8c6338c114d88..0200dbf852c30 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -104,6 +104,7 @@ def get_long_description(): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c0e1d66bed48c..2967fa884fcbe 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -577,6 +577,7 @@ def get_long_description(): "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", From ec638d302a61b80127f5d4b94283ee802544a535 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 7 Jun 2023 08:25:25 -0700 Subject: [PATCH 2/4] loosen version --- metadata-ingestion/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/build.gradle b/metadata-ingestion/build.gradle index a1769df968fef..bb48de783e66d 100644 --- a/metadata-ingestion/build.gradle +++ b/metadata-ingestion/build.gradle @@ -17,7 +17,7 @@ def get_coverage_arg(test_name) { task checkPythonVersion(type: Exec) { commandLine python_executable, '-c', - 'import sys; assert (3, 11) > sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"' + 'import sys; assert sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"' } task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { From 745e12470b6960af6407321017a971a24b963fee Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 27 Oct 2023 17:51:22 -0700 Subject: [PATCH 3/4] fix StrEnum --- .../api/entities/dataprocess/dataprocess_instance.py | 4 ++-- .../src/datahub/configuration/time_window_config.py | 3 ++- .../datahub/ingestion/glossary/datahub_classifier.py | 4 ++-- .../src/datahub/ingestion/graph/client.py | 4 ++-- .../src/datahub/ingestion/source/common/subtypes.py | 10 +++++----- .../src/datahub/ingestion/source/kafka.py | 4 ++-- .../ingestion/source/looker/looker_query_model.py | 6 +----- .../datahub/ingestion/source/snowflake/constants.py | 8 ++++---- .../ingestion/source/snowflake/snowflake_config.py | 4 ++-- .../src/datahub/ingestion/source/sql/presto_on_hive.py | 4 ++-- metadata-ingestion/tests/performance/data_model.py | 5 +++-- 11 files changed, 27 insertions(+), 29 deletions(-) diff --git a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py index cf6080c7072e6..fd55430dde706 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py +++ b/metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py @@ -1,6 +1,5 @@ import time from dataclasses import dataclass, field -from enum import Enum from typing import Callable, Dict, Iterable, List, Optional, Union, cast from datahub.api.entities.datajob import DataFlow, DataJob @@ -22,6 +21,7 @@ DataProcessTypeClass, StatusClass, ) +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.data_flow_urn import DataFlowUrn from datahub.utilities.urns.data_job_urn import DataJobUrn from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn @@ -34,7 +34,7 @@ class DataProcessInstanceKey(DatahubKey): id: str -class InstanceRunResult(str, Enum): +class InstanceRunResult(StrEnum): SUCCESS = RunResultType.SUCCESS SKIPPED = RunResultType.SKIPPED FAILURE = RunResultType.FAILURE diff --git a/metadata-ingestion/src/datahub/configuration/time_window_config.py b/metadata-ingestion/src/datahub/configuration/time_window_config.py index 15de7470e4d82..d621be79522ad 100644 --- a/metadata-ingestion/src/datahub/configuration/time_window_config.py +++ b/metadata-ingestion/src/datahub/configuration/time_window_config.py @@ -9,10 +9,11 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.datetimes import parse_absolute_time, parse_relative_timespan from datahub.metadata.schema_classes import CalendarIntervalClass +from datahub.utilities.str_enum import StrEnum @enum.unique -class BucketDuration(str, enum.Enum): +class BucketDuration(StrEnum): DAY = CalendarIntervalClass.DAY HOUR = CalendarIntervalClass.HOUR diff --git a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py index 1f2b7f5689ea3..9314da909f055 100644 --- a/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py +++ b/metadata-ingestion/src/datahub/ingestion/glossary/datahub_classifier.py @@ -1,4 +1,3 @@ -from enum import Enum from typing import Any, Dict, List, Optional from datahub_classify.helper_classes import ColumnInfo @@ -9,6 +8,7 @@ from datahub.configuration.common import ConfigModel from datahub.ingestion.glossary.classifier import Classifier +from datahub.utilities.str_enum import StrEnum class NameFactorConfig(ConfigModel): @@ -32,7 +32,7 @@ class DataTypeFactorConfig(ConfigModel): ) -class ValuePredictionType(str, Enum): +class ValuePredictionType(StrEnum): REGEX = "regex" LIBRARY = "library" diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index ccff677c3a471..3ee8d52651c20 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -1,4 +1,3 @@ -import enum import functools import json import logging @@ -44,6 +43,7 @@ TelemetryClientIdClass, ) from datahub.utilities.perf_timer import PerfTimer +from datahub.utilities.str_enum import StrEnum from datahub.utilities.urns.urn import Urn, guess_entity_type if TYPE_CHECKING: @@ -802,7 +802,7 @@ def execute_graphql(self, query: str, variables: Optional[Dict] = None) -> Dict: return result["data"] - class RelationshipDirection(str, enum.Enum): + class RelationshipDirection(StrEnum): # FIXME: Upgrade to enum.StrEnum when we drop support for Python 3.10 INCOMING = "INCOMING" diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 741b4789bef21..0244069c1f978 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class DatasetSubTypes(str, Enum): +class DatasetSubTypes(StrEnum): # Generic SubTypes TABLE = "Table" VIEW = "View" @@ -20,7 +20,7 @@ class DatasetSubTypes(str, Enum): NOTEBOOK = "Notebook" -class DatasetContainerSubTypes(str, Enum): +class DatasetContainerSubTypes(StrEnum): # Generic SubTypes DATABASE = "Database" SCHEMA = "Schema" @@ -34,13 +34,13 @@ class DatasetContainerSubTypes(str, Enum): GCS_BUCKET = "GCS bucket" -class BIContainerSubTypes(str, Enum): +class BIContainerSubTypes(StrEnum): LOOKER_FOLDER = "Folder" TABLEAU_WORKBOOK = "Workbook" POWERBI_WORKSPACE = "Workspace" POWERBI_DATASET = "PowerBI Dataset" -class BIAssetSubTypes(str, Enum): +class BIAssetSubTypes(StrEnum): # Generic SubTypes REPORT = "Report" diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka.py b/metadata-ingestion/src/datahub/ingestion/source/kafka.py index 23770ff3cf812..2b8226a67bee3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka.py @@ -2,7 +2,6 @@ import json import logging from dataclasses import dataclass, field -from enum import Enum from typing import Any, Dict, Iterable, List, Optional, Type, cast import avro.schema @@ -66,11 +65,12 @@ ) from datahub.utilities.mapping import Constants, OperationProcessor from datahub.utilities.registries.domain_registry import DomainRegistry +from datahub.utilities.str_enum import StrEnum logger = logging.getLogger(__name__) -class KafkaTopicConfigKeys(str, Enum): +class KafkaTopicConfigKeys(StrEnum): MIN_INSYNC_REPLICAS_CONFIG = "min.insync.replicas" RETENTION_SIZE_CONFIG = "retention.bytes" RETENTION_TIME_CONFIG = "retention.ms" diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py index b3002828ceeff..7ed46c8f7084c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_query_model.py @@ -1,13 +1,9 @@ from dataclasses import dataclass, field -from enum import Enum from typing import Dict, List, cast from looker_sdk.sdk.api40.models import WriteQuery - -# Enum whose value is string and compatible with dictionary having string value as key -class StrEnum(str, Enum): - pass +from datahub.utilities.str_enum import StrEnum class LookerModel(StrEnum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py index 6f5e22e39d0c8..9ffe89d8f1c27 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/constants.py @@ -1,7 +1,7 @@ -from enum import Enum +from datahub.utilities.str_enum import StrEnum -class SnowflakeCloudProvider(str, Enum): +class SnowflakeCloudProvider(StrEnum): AWS = "aws" GCP = "gcp" AZURE = "azure" @@ -10,7 +10,7 @@ class SnowflakeCloudProvider(str, Enum): SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS -class SnowflakeEdition(str, Enum): +class SnowflakeEdition(StrEnum): STANDARD = "Standard" # We use this to represent Enterprise Edition or higher @@ -44,7 +44,7 @@ class SnowflakeEdition(str, Enum): # We will always compare with lowercase # Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html -class SnowflakeObjectDomain(str, Enum): +class SnowflakeObjectDomain(StrEnum): TABLE = "table" EXTERNAL_TABLE = "external table" VIEW = "view" diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py index 032bdef178fdf..b820d8239283f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py @@ -1,7 +1,6 @@ import logging from collections import defaultdict from dataclasses import dataclass -from enum import Enum from typing import Dict, List, Optional, Set, cast from pydantic import Field, SecretStr, root_validator, validator @@ -24,6 +23,7 @@ ) from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig from datahub.utilities.global_warning_util import add_global_warning +from datahub.utilities.str_enum import StrEnum logger = logging.Logger(__name__) @@ -40,7 +40,7 @@ ] -class TagOption(str, Enum): +class TagOption(StrEnum): with_lineage = "with_lineage" without_lineage = "without_lineage" skip = "skip" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py index ceb9ecacb25d2..fe916cae6df1d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/presto_on_hive.py @@ -2,7 +2,6 @@ import json import logging from collections import namedtuple -from enum import Enum from itertools import groupby from typing import Any, Dict, Iterable, List, Optional, Tuple, Union @@ -61,13 +60,14 @@ ViewPropertiesClass, ) from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column +from datahub.utilities.str_enum import StrEnum logger: logging.Logger = logging.getLogger(__name__) TableKey = namedtuple("TableKey", ["schema", "table"]) -class PrestoOnHiveConfigMode(str, Enum): +class PrestoOnHiveConfigMode(StrEnum): hive: str = "hive" # noqa: F811 presto: str = "presto" presto_on_hive: str = "presto-on-hive" diff --git a/metadata-ingestion/tests/performance/data_model.py b/metadata-ingestion/tests/performance/data_model.py index 9425fa827070e..2a1da6a7917a2 100644 --- a/metadata-ingestion/tests/performance/data_model.py +++ b/metadata-ingestion/tests/performance/data_model.py @@ -1,10 +1,11 @@ from dataclasses import dataclass from datetime import datetime -from enum import Enum from typing import Dict, List, Optional from typing_extensions import Literal +from datahub.utilities.str_enum import StrEnum + StatementType = Literal[ # SELECT + values from OperationTypeClass "SELECT", "INSERT", @@ -24,7 +25,7 @@ class Container: parent: Optional["Container"] = None -class ColumnType(str, Enum): +class ColumnType(StrEnum): # Can add types that take parameters in the future INTEGER = "INTEGER" From b94b6d008b27699f6784cadb7990b63c4c1efd74 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Sun, 29 Oct 2023 21:06:36 -0700 Subject: [PATCH 4/4] add missing file --- .../src/datahub/utilities/str_enum.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 metadata-ingestion/src/datahub/utilities/str_enum.py diff --git a/metadata-ingestion/src/datahub/utilities/str_enum.py b/metadata-ingestion/src/datahub/utilities/str_enum.py new file mode 100644 index 0000000000000..b8392f40770e4 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/str_enum.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class StrEnum(str, Enum): + """String Enum class.""" + + # This is required for compatibility with Python 3.11+, which changed the + # behavior of enums in format() and f-strings. + # Once we're using only Python 3.11+, we can replace this with enum.StrEnum. + # See https://blog.pecar.me/python-enum for more details. + + def __str__(self) -> str: + """Return the string representation of the enum.""" + return str(self.value)