Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci(ingest): test with python 3.11 #6485

Closed
wants to merge 15 commits into from
Closed
4 changes: 2 additions & 2 deletions .github/workflows/metadata-ingestion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
# DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }}
strategy:
matrix:
python-version: ["3.7", "3.10"]
python-version: ["3.7", "3.11"]
command:
[
"testQuick",
Expand All @@ -41,7 +41,7 @@ jobs:
]
include:
- python-version: "3.7"
- python-version: "3.10"
- python-version: "3.11"
fail-fast: false
steps:
- uses: actions/checkout@v3
Expand Down
1 change: 1 addition & 0 deletions metadata-ingestion-modules/airflow-plugin/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def get_long_description():
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: System Administrators",
Expand Down
2 changes: 1 addition & 1 deletion metadata-ingestion/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def get_coverage_arg(test_name) {

task checkPythonVersion(type: Exec) {
commandLine python_executable, '-c',
'import sys; assert (3, 11) > sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"'
'import sys; assert sys.version_info >= (3, 7), f"Python version {sys.version_info[:2]} not allowed"'
}

task environmentSetup(type: Exec, dependsOn: checkPythonVersion) {
Expand Down
1 change: 1 addition & 0 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,7 @@
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Intended Audience :: Developers",
"Intended Audience :: Information Technology",
"Intended Audience :: System Administrators",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import time
from dataclasses import dataclass, field
from enum import Enum
from typing import Callable, Dict, Iterable, List, Optional, Union, cast

from datahub.api.entities.datajob import DataFlow, DataJob
Expand All @@ -22,6 +21,7 @@
DataProcessTypeClass,
StatusClass,
)
from datahub.utilities.str_enum import StrEnum
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
from datahub.utilities.urns.data_job_urn import DataJobUrn
from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
Expand All @@ -34,7 +34,7 @@ class DataProcessInstanceKey(DatahubKey):
id: str


class InstanceRunResult(str, Enum):
class InstanceRunResult(StrEnum):
SUCCESS = RunResultType.SUCCESS
SKIPPED = RunResultType.SKIPPED
FAILURE = RunResultType.FAILURE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from datahub.configuration.common import ConfigModel
from datahub.configuration.datetimes import parse_absolute_time, parse_relative_timespan
from datahub.metadata.schema_classes import CalendarIntervalClass
from datahub.utilities.str_enum import StrEnum


@enum.unique
class BucketDuration(str, enum.Enum):
class BucketDuration(StrEnum):
DAY = CalendarIntervalClass.DAY
HOUR = CalendarIntervalClass.HOUR

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from enum import Enum
from typing import Any, Dict, List, Optional

from datahub_classify.helper_classes import ColumnInfo
Expand All @@ -9,6 +8,7 @@

from datahub.configuration.common import ConfigModel
from datahub.ingestion.glossary.classifier import Classifier
from datahub.utilities.str_enum import StrEnum


class NameFactorConfig(ConfigModel):
Expand All @@ -32,7 +32,7 @@ class DataTypeFactorConfig(ConfigModel):
)


class ValuePredictionType(str, Enum):
class ValuePredictionType(StrEnum):
REGEX = "regex"
LIBRARY = "library"

Expand Down
4 changes: 2 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/graph/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import enum
import functools
import json
import logging
Expand Down Expand Up @@ -44,6 +43,7 @@
TelemetryClientIdClass,
)
from datahub.utilities.perf_timer import PerfTimer
from datahub.utilities.str_enum import StrEnum
from datahub.utilities.urns.urn import Urn, guess_entity_type

if TYPE_CHECKING:
Expand Down Expand Up @@ -802,7 +802,7 @@ def execute_graphql(self, query: str, variables: Optional[Dict] = None) -> Dict:

return result["data"]

class RelationshipDirection(str, enum.Enum):
class RelationshipDirection(StrEnum):
# FIXME: Upgrade to enum.StrEnum when we drop support for Python 3.10

INCOMING = "INCOMING"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import Enum
from datahub.utilities.str_enum import StrEnum


class DatasetSubTypes(str, Enum):
class DatasetSubTypes(StrEnum):
# Generic SubTypes
TABLE = "Table"
VIEW = "View"
Expand All @@ -20,7 +20,7 @@ class DatasetSubTypes(str, Enum):
NOTEBOOK = "Notebook"


class DatasetContainerSubTypes(str, Enum):
class DatasetContainerSubTypes(StrEnum):
# Generic SubTypes
DATABASE = "Database"
SCHEMA = "Schema"
Expand All @@ -34,13 +34,13 @@ class DatasetContainerSubTypes(str, Enum):
GCS_BUCKET = "GCS bucket"


class BIContainerSubTypes(str, Enum):
class BIContainerSubTypes(StrEnum):
LOOKER_FOLDER = "Folder"
TABLEAU_WORKBOOK = "Workbook"
POWERBI_WORKSPACE = "Workspace"
POWERBI_DATASET = "PowerBI Dataset"


class BIAssetSubTypes(str, Enum):
class BIAssetSubTypes(StrEnum):
# Generic SubTypes
REPORT = "Report"
4 changes: 2 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Iterable, List, Optional, Type, cast

import avro.schema
Expand Down Expand Up @@ -66,11 +65,12 @@
)
from datahub.utilities.mapping import Constants, OperationProcessor
from datahub.utilities.registries.domain_registry import DomainRegistry
from datahub.utilities.str_enum import StrEnum

logger = logging.getLogger(__name__)


class KafkaTopicConfigKeys(str, Enum):
class KafkaTopicConfigKeys(StrEnum):
MIN_INSYNC_REPLICAS_CONFIG = "min.insync.replicas"
RETENTION_SIZE_CONFIG = "retention.bytes"
RETENTION_TIME_CONFIG = "retention.ms"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Dict, List, cast

from looker_sdk.sdk.api40.models import WriteQuery


# Enum whose value is string and compatible with dictionary having string value as key
class StrEnum(str, Enum):
pass
from datahub.utilities.str_enum import StrEnum


class LookerModel(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from enum import Enum
from datahub.utilities.str_enum import StrEnum


class SnowflakeCloudProvider(str, Enum):
class SnowflakeCloudProvider(StrEnum):
AWS = "aws"
GCP = "gcp"
AZURE = "azure"
Expand All @@ -10,7 +10,7 @@ class SnowflakeCloudProvider(str, Enum):
SNOWFLAKE_DEFAULT_CLOUD = SnowflakeCloudProvider.AWS


class SnowflakeEdition(str, Enum):
class SnowflakeEdition(StrEnum):
STANDARD = "Standard"

# We use this to represent Enterprise Edition or higher
Expand Down Expand Up @@ -44,7 +44,7 @@ class SnowflakeEdition(str, Enum):

# We will always compare with lowercase
# Complete list for objectDomain - https://docs.snowflake.com/en/sql-reference/account-usage/access_history.html
class SnowflakeObjectDomain(str, Enum):
class SnowflakeObjectDomain(StrEnum):
TABLE = "table"
EXTERNAL_TABLE = "external table"
VIEW = "view"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from collections import defaultdict
from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Set, cast

from pydantic import Field, SecretStr, root_validator, validator
Expand All @@ -24,6 +23,7 @@
)
from datahub.ingestion.source_config.usage.snowflake_usage import SnowflakeUsageConfig
from datahub.utilities.global_warning_util import add_global_warning
from datahub.utilities.str_enum import StrEnum

logger = logging.Logger(__name__)

Expand All @@ -40,7 +40,7 @@
]


class TagOption(str, Enum):
class TagOption(StrEnum):
with_lineage = "with_lineage"
without_lineage = "without_lineage"
skip = "skip"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
from collections import namedtuple
from enum import Enum
from itertools import groupby
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

Expand Down Expand Up @@ -61,13 +60,14 @@
ViewPropertiesClass,
)
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column
from datahub.utilities.str_enum import StrEnum

logger: logging.Logger = logging.getLogger(__name__)

TableKey = namedtuple("TableKey", ["schema", "table"])


class PrestoOnHiveConfigMode(str, Enum):
class PrestoOnHiveConfigMode(StrEnum):
hive: str = "hive" # noqa: F811
presto: str = "presto"
presto_on_hive: str = "presto-on-hive"
Expand Down
14 changes: 14 additions & 0 deletions metadata-ingestion/src/datahub/utilities/str_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from enum import Enum


class StrEnum(str, Enum):
"""String Enum class."""

# This is required for compatibility with Python 3.11+, which changed the
# behavior of enums in format() and f-strings.
# Once we're using only Python 3.11+, we can replace this with enum.StrEnum.
# See https://blog.pecar.me/python-enum for more details.

def __str__(self) -> str:
"""Return the string representation of the enum."""
return str(self.value)
5 changes: 3 additions & 2 deletions metadata-ingestion/tests/performance/data_model.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional

from typing_extensions import Literal

from datahub.utilities.str_enum import StrEnum

StatementType = Literal[ # SELECT + values from OperationTypeClass
"SELECT",
"INSERT",
Expand All @@ -24,7 +25,7 @@ class Container:
parent: Optional["Container"] = None


class ColumnType(str, Enum):
class ColumnType(StrEnum):
# Can add types that take parameters in the future

INTEGER = "INTEGER"
Expand Down
Loading