Skip to content

Commit

Permalink
FIX-modin-project#7134: Use a separate docstring class for BasePandas…
Browse files Browse the repository at this point in the history
…Dataset. (modin-project#7353)

Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
  • Loading branch information
sfc-gh-mvashishtha authored and arunjose696 committed Aug 1, 2024
1 parent 621f49e commit 3014693
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 3 deletions.
18 changes: 18 additions & 0 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,24 @@ class CpuCount(EnvironmentVariable, type=int):
"""How many CPU cores to use during initialization of the Modin engine."""

varname = "MODIN_CPUS"

@classmethod
def _put(cls, value: int) -> None:
"""
Put specific value if CpuCount wasn't set by a user yet.
Parameters
----------
value : int
Config value to set.
Notes
-----
This method is used to set CpuCount from cluster resources internally
and should not be called by a user.
"""
if cls.get_value_source() == ValueSource.DEFAULT:
cls.put(value)

@classmethod
def _get_default(cls) -> int:
Expand Down
1 change: 1 addition & 0 deletions modin/core/execution/dask/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,4 @@ def _disable_warnings():

num_cpus = len(client.ncores())
NPartitions._put(num_cpus)
CpuCount._put(num_cpus)
1 change: 1 addition & 0 deletions modin/core/execution/ray/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ def initialize_ray(

num_cpus = int(ray.cluster_resources()["CPU"])
NPartitions._put(num_cpus)
CpuCount._put(num_cpus)

# TODO(https://github.com/ray-project/ray/issues/28216): remove this
# workaround once Ray gives a better way to suppress task errors.
Expand Down
1 change: 1 addition & 0 deletions modin/core/execution/unidist/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def initialize_unidist():

num_cpus = sum(v["CPU"] for v in unidist.cluster_resources().values())
modin_cfg.NPartitions._put(num_cpus)
modin_cfg.CpuCount._put(num_cpus)


def deserialize(obj): # pragma: no cover
Expand Down
4 changes: 2 additions & 2 deletions modin/tests/config/docs_module/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

from .classes import DataFrame, Series
from .classes import BasePandasDataset, DataFrame, Series
from .functions import read_csv

__all__ = ["DataFrame", "Series", "read_csv"]
__all__ = ["BasePandasDataset", "DataFrame", "Series", "read_csv"]
8 changes: 8 additions & 0 deletions modin/tests/config/docs_module/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,11 @@ class Series:
def isna(self):
"""This is a test of the documentation module for Series."""
return


class BasePandasDataset:
"""This is a test of the documentation module for BasePandasDataSet."""

def apply():
"""This is a test of the documentation module for BasePandasDataSet.apply."""
return
8 changes: 8 additions & 0 deletions modin/tests/config/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import modin.pandas as pd
from modin.config.envvars import _check_vars
from modin.config.pubsub import _UNSET, ExactStr
from modin.pandas.base import BasePandasDataset


def reset_vars(*vars: tuple[cfg.Parameter]):
Expand Down Expand Up @@ -89,13 +90,20 @@ def test_overrides(self):
cfg.DocModule.put("modin.tests.config.docs_module")

# Test for override
assert BasePandasDataset.__doc__ == (
"This is a test of the documentation module for BasePandasDataSet."
)
assert BasePandasDataset.apply.__doc__ == (
"This is a test of the documentation module for BasePandasDataSet.apply."
)
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
assert BasePandasDataset.astype.__doc__ in pd.DataFrame.astype.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
Expand Down
13 changes: 12 additions & 1 deletion modin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,18 @@ def _inherit_docstrings_in_place(
if doc_module != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
parent_name = (
# DocModule should use the class BasePandasDataset to override the
# docstrings of BasePandasDataset, even if BasePandasDataset
# normally inherits docstrings from a different `parent`.
"BasePandasDataset"
if getattr(cls_or_func, "__name__", "") == "BasePandasDataset"
# For other classes, override docstrings with the class that has the
# same name as the `parent` class, e.g. DataFrame inherits
# docstrings from doc_module.DataFrame.
else getattr(parent, "__name__", "")
)
parent = getattr(imported_doc_module, parent_name, parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
Expand Down

0 comments on commit 3014693

Please sign in to comment.