Skip to content

Commit

Permalink
FIX-#5308: Allow custom execution with no known engine. (#5379)
Browse files Browse the repository at this point in the history
Signed-off-by: mvashishtha <mahesh@ponder.io>
  • Loading branch information
mvashishtha authored Dec 12, 2022
1 parent 1590e65 commit 0254080
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 35 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,42 @@ jobs:
- run: python -m pytest modin/test/test_logging.py
- uses: codecov/codecov-action@v2

test-no-engine:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -l {0}
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 1
- name: Cache conda
uses: actions/cache@v3
with:
path: |
~/conda_pkgs_dir
~/.cache/pip
key:
${{ runner.os }}-conda-${{ hashFiles('requirements-no-engine.yml') }}
- uses: conda-incubator/setup-miniconda@v2
with:
miniforge-variant: Mambaforge
miniforge-version: latest
use-mamba: true
activate-environment: modin
environment-file: requirements/requirements-no-engine.yml
python-version: 3.8
channel-priority: strict
# we set use-only-tar-bz2 to false in order for conda to properly find new packages to be installed
# for more info see https://github.com/conda-incubator/setup-miniconda/issues/264
use-only-tar-bz2: false
- name: Conda environment
run: |
conda info
conda list
- run: python -m pytest modin/core/execution/dispatching/factories/test/test_dispatcher.py::test_add_option
- uses: codecov/codecov-action@v2

test-defaults:
needs: [lint-flake8, lint-black, lint-mypy, test-api, test-headers]
runs-on: ubuntu-latest
Expand Down
7 changes: 6 additions & 1 deletion modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class Engine(EnvironmentVariable, type=str):
"Python",
} # engines that don't require initialization, useful for unit tests

has_custom_engine = False

@classmethod
def _get_default(cls) -> str:
"""
Expand All @@ -98,7 +100,9 @@ def _get_default(cls) -> str:
MIN_UNIDIST_VERSION,
)

if IsDebug.get():
# If there's a custom engine, we don't need to check for any engine
# dependencies. Return the default "Python" engine.
if IsDebug.get() or cls.has_custom_engine:
return "Python"
try:
import ray
Expand Down Expand Up @@ -160,6 +164,7 @@ def _get_default(cls) -> str:
def add_option(cls, choice: Any) -> Any:
choice = super().add_option(choice)
cls.NOINIT_ENGINES.add(choice)
cls.has_custom_engine = True
return choice


Expand Down
66 changes: 32 additions & 34 deletions modin/core/execution/dispatching/factories/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ class FactoryDispatcher(object):
@classmethod
def get_factory(cls) -> factories.BaseFactory:
"""Get current factory."""
# mostly for testing
if cls.__factory is None:
Engine.subscribe(cls._update_factory)
StorageFormat.subscribe(cls._update_factory)
return cls.__factory

@classmethod
Expand Down Expand Up @@ -155,152 +157,148 @@ def _update_factory(cls, _):
@classmethod
@_inherit_docstrings(factories.BaseFactory._from_pandas)
def from_pandas(cls, df):
return cls.__factory._from_pandas(df)
return cls.get_factory()._from_pandas(df)

@classmethod
@_inherit_docstrings(factories.BaseFactory._from_arrow)
def from_arrow(cls, at):
return cls.__factory._from_arrow(at)
return cls.get_factory()._from_arrow(at)

@classmethod
@_inherit_docstrings(factories.BaseFactory._from_non_pandas)
def from_non_pandas(cls, *args, **kwargs):
return cls.__factory._from_non_pandas(*args, **kwargs)
return cls.get_factory()._from_non_pandas(*args, **kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._from_dataframe)
def from_dataframe(cls, *args, **kwargs):
return cls.__factory._from_dataframe(*args, **kwargs)
return cls.get_factory()._from_dataframe(*args, **kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_parquet)
def read_parquet(cls, **kwargs):
return cls.__factory._read_parquet(**kwargs)
return cls.get_factory()._read_parquet(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_csv)
def read_csv(cls, **kwargs):
return cls.__factory._read_csv(**kwargs)
return cls.get_factory()._read_csv(**kwargs)

@classmethod
@_inherit_docstrings(factories.ExperimentalPandasOnRayFactory._read_csv_glob)
def read_csv_glob(cls, **kwargs):
return cls.__factory._read_csv_glob(**kwargs)
return cls.get_factory()._read_csv_glob(**kwargs)

@classmethod
@_inherit_docstrings(
factories.ExperimentalPandasOnRayFactory._read_pickle_distributed
)
def read_pickle_distributed(cls, **kwargs):
return cls.__factory._read_pickle_distributed(**kwargs)
return cls.get_factory()._read_pickle_distributed(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_json)
def read_json(cls, **kwargs):
return cls.__factory._read_json(**kwargs)
return cls.get_factory()._read_json(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_gbq)
def read_gbq(cls, **kwargs):
return cls.__factory._read_gbq(**kwargs)
return cls.get_factory()._read_gbq(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_html)
def read_html(cls, **kwargs):
return cls.__factory._read_html(**kwargs)
return cls.get_factory()._read_html(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_clipboard)
def read_clipboard(cls, **kwargs):
return cls.__factory._read_clipboard(**kwargs)
return cls.get_factory()._read_clipboard(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_excel)
def read_excel(cls, **kwargs):
return cls.__factory._read_excel(**kwargs)
return cls.get_factory()._read_excel(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_hdf)
def read_hdf(cls, **kwargs):
return cls.__factory._read_hdf(**kwargs)
return cls.get_factory()._read_hdf(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_feather)
def read_feather(cls, **kwargs):
return cls.__factory._read_feather(**kwargs)
return cls.get_factory()._read_feather(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_stata)
def read_stata(cls, **kwargs):
return cls.__factory._read_stata(**kwargs)
return cls.get_factory()._read_stata(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_sas)
def read_sas(cls, **kwargs): # pragma: no cover
return cls.__factory._read_sas(**kwargs)
return cls.get_factory()._read_sas(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_pickle)
def read_pickle(cls, **kwargs):
return cls.__factory._read_pickle(**kwargs)
return cls.get_factory()._read_pickle(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_sql)
def read_sql(cls, **kwargs):
return cls.__factory._read_sql(**kwargs)
return cls.get_factory()._read_sql(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_fwf)
def read_fwf(cls, **kwargs):
return cls.__factory._read_fwf(**kwargs)
return cls.get_factory()._read_fwf(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_sql_table)
def read_sql_table(cls, **kwargs):
return cls.__factory._read_sql_table(**kwargs)
return cls.get_factory()._read_sql_table(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_sql_query)
def read_sql_query(cls, **kwargs):
return cls.__factory._read_sql_query(**kwargs)
return cls.get_factory()._read_sql_query(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._read_spss)
def read_spss(cls, **kwargs):
return cls.__factory._read_spss(**kwargs)
return cls.get_factory()._read_spss(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._to_sql)
def to_sql(cls, *args, **kwargs):
return cls.__factory._to_sql(*args, **kwargs)
return cls.get_factory()._to_sql(*args, **kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._to_pickle)
def to_pickle(cls, *args, **kwargs):
return cls.__factory._to_pickle(*args, **kwargs)
return cls.get_factory()._to_pickle(*args, **kwargs)

@classmethod
@_inherit_docstrings(
factories.ExperimentalPandasOnRayFactory._to_pickle_distributed
)
def to_pickle_distributed(cls, *args, **kwargs):
return cls.__factory._to_pickle_distributed(*args, **kwargs)
return cls.get_factory()._to_pickle_distributed(*args, **kwargs)

@classmethod
@_inherit_docstrings(factories.ExperimentalPandasOnRayFactory._read_custom_text)
def read_custom_text(cls, **kwargs):
return cls.__factory._read_custom_text(**kwargs)
return cls.get_factory()._read_custom_text(**kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._to_csv)
def to_csv(cls, *args, **kwargs):
return cls.__factory._to_csv(*args, **kwargs)
return cls.get_factory()._to_csv(*args, **kwargs)

@classmethod
@_inherit_docstrings(factories.BaseFactory._to_parquet)
def to_parquet(cls, *args, **kwargs):
return cls.__factory._to_parquet(*args, **kwargs)


Engine.subscribe(FactoryDispatcher._update_factory)
StorageFormat.subscribe(FactoryDispatcher._update_factory)
return cls.get_factory()._to_parquet(*args, **kwargs)
48 changes: 48 additions & 0 deletions requirements/requirements-no-engine.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
channels:
- conda-forge
dependencies:
- pandas==1.5.2
- numpy>=1.18.5
- pyarrow>=4.0.1
- fsspec
- xarray
- Jinja2
- scipy
- pip
- s3fs>=2021.8
- feather-format
- lxml
- openpyxl
- xlrd
- matplotlib
- sqlalchemy>=1.4.0
- pandas-gbq
- pytables
- msgpack-python
- psutil
- pytest>=6.0.1
- pytest-benchmark
- pytest-cov>=2.10.1
- pytest-xdist>=2.1.0
- coverage
- pygithub
- rpyc==4.1.5
- cloudpickle
- boto3
# TODO(https://github.com/modin-project/modin/issues/5194): Uncap xgboost
# when we use collective instead of rabit.
- xgboost>=1.7.1,<2.0.0
- tqdm
- pip:
# Fixes breaking ipywidgets changes, but didn't release yet.
- git+https://github.com/modin-project/modin-spreadsheet.git@49ffd89f683f54c311867d602c55443fb11bf2a5
- git+https://github.com/airspeed-velocity/asv.git@ef016e233cb9a0b19d517135104f49e0a3c380e9
# no conda package for windows
- connectorx>=0.2.6a4
- black
# TODO: remove when flake8 5.x stabilizes and appears in both pip and conda-forge; see GH-#4745
- flake8<5
- flake8-no-implicit-concat
- flake8-print
# The `numpydoc` version should match the version installed in the `lint-pydocstyle` job of the CI.
- numpydoc==1.1.0

0 comments on commit 0254080

Please sign in to comment.