Skip to content

Commit

Permalink
FIX-#7138: Stop reloading modules for custom docstrings. (#7307)
Browse files Browse the repository at this point in the history
Signed-off-by: sfc-gh-mvashishtha <mahesh.vashishtha@snowflake.com>
Co-authored-by: Anatoly Myachev <anatoliimyachev@mail.com>
  • Loading branch information
sfc-gh-mvashishtha and anmyachev authored Jun 11, 2024
1 parent af5ed06 commit 0a06455
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 119 deletions.
31 changes: 0 additions & 31 deletions modin/config/envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

"""Module houses Modin configs originated from environment variables."""

import importlib
import os
import secrets
import sys
Expand Down Expand Up @@ -771,36 +770,6 @@ class DocModule(EnvironmentVariable, type=ExactStr):
varname = "MODIN_DOC_MODULE"
default = "pandas"

@classmethod
def put(cls, value: str) -> None:
"""
Assign a value to the DocModule config.
Parameters
----------
value : str
Config value to set.
"""
super().put(value)
# Reload everything to apply the documentation. This is required since the
# docs might already have been created and the implementation will assume
# that the new docs are applied when the config is set. This set of operations
# does this.
import modin.pandas as pd

importlib.reload(pd.accessor)
importlib.reload(pd.base)
importlib.reload(pd.dataframe)
importlib.reload(pd.general)
importlib.reload(pd.groupby)
importlib.reload(pd.io)
importlib.reload(pd.iterator)
importlib.reload(pd.series)
importlib.reload(pd.series_utils)
importlib.reload(pd.utils)
importlib.reload(pd.window)
importlib.reload(pd)


class DaskThreadsPerWorker(EnvironmentVariable, type=int):
"""Number of threads per Dask worker."""
Expand Down
70 changes: 42 additions & 28 deletions modin/tests/config/test_envvars.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import os

import pandas
import pytest

import modin.config as cfg
Expand Down Expand Up @@ -79,34 +80,47 @@ def test_custom_help(make_custom_envvar):
assert "custom var" in make_custom_envvar.get_help()


def test_doc_module():
import pandas

import modin.pandas as pd
from modin.config import DocModule

DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
== "This is a test of the documentation module for Series."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
# Test for override
assert pd.read_csv.__doc__ == "Test override for functions on the module."
# Test for pandas doc when function is not defined on module.
assert pandas.read_table.__doc__ in pd.read_table.__doc__
class TestDocModule:
"""
Test using a module to replace default docstrings.
"""

def test_overrides(self):
cfg.DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.DataFrame.isna.__doc__ in pd.DataFrame.isna.__doc__
assert pandas.DataFrame.isnull.__doc__ in pd.DataFrame.isnull.__doc__
# Test for override
assert (
pd.Series.isna.__doc__
== "This is a test of the documentation module for Series."
)
# Test for pandas doc when method is not defined on the plugin module
assert pandas.Series.isnull.__doc__ in pd.Series.isnull.__doc__
assert pandas.Series.apply.__doc__ in pd.Series.apply.__doc__
# Test for override
assert pd.read_csv.__doc__ == "Test override for functions on the module."
# Test for pandas doc when function is not defined on module.
assert pandas.read_table.__doc__ in pd.read_table.__doc__

def test_not_redefining_classes_modin_issue_7138(self):
original_dataframe_class = pd.DataFrame

cfg.DocModule.put("modin.tests.config.docs_module")

# Test for override
assert (
pd.DataFrame.apply.__doc__
== "This is a test of the documentation module for DataFrame."
)

assert pd.DataFrame is original_dataframe_class


@pytest.mark.skipif(cfg.Engine.get() != "Ray", reason="Ray specific test")
Expand Down
196 changes: 136 additions & 60 deletions modin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,129 @@ def _replace_doc(
target_obj.__doc__ = doc


# This is a map from objects whose docstrings we are overriding to functions that
# take a DocModule string and override the docstring according to the
# DocModule. When we update DocModule, we can use this map to update all
# inherited docstrings.
_docstring_inheritance_calls: list[Callable[[str], None]] = []


def _documentable_obj(obj: object) -> bool:
"""
Check whether we can replace the docstring of `obj`.
Parameters
----------
obj : object
Object whose docstring we want to replace.
Returns
-------
bool
Whether we can replace the docstring.
"""
return bool(
callable(obj)
and not inspect.isclass(obj)
or (isinstance(obj, property) and obj.fget)
or (isinstance(obj, functools.cached_property))
or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
)


def _update_inherited_docstrings(doc_module: DocModule) -> None:
"""
Update all inherited docstrings.
Parameters
----------
doc_module : DocModule
The current DocModule.
"""
_doc_module = doc_module.get()
for doc_inheritance_call in _docstring_inheritance_calls:
doc_inheritance_call(doc_module=_doc_module) # type: ignore[call-arg]


def _inherit_docstrings_in_place(
cls_or_func: Fn,
doc_module: str,
parent: object,
excluded: List[object],
overwrite_existing: bool = False,
apilink: Optional[Union[str, List[str]]] = None,
) -> None:
"""
Replace `cls_or_func` docstrings with `parent` docstrings in place.
Parameters
----------
cls_or_func : Fn
The class or function whose docstrings we need to update.
doc_module : str
The docs module.
parent : object
Parent object from which the decorated object inherits __doc__.
excluded : list, default: []
List of parent objects from which the class does not
inherit docstrings.
overwrite_existing : bool, default: False
Allow overwriting docstrings that already exist in
the decorated class.
apilink : str | List[str], optional
If non-empty, insert the link(s) to pandas API documentation.
Should be the prefix part in the URL template, e.g. "pandas.DataFrame".
"""
# Import the docs module and get the class (e.g. `DataFrame`).
imported_doc_module = importlib.import_module(doc_module)
# Set the default parent so we can use it in case some docs are missing from
# parent module.
default_parent = parent
# Try to get the parent object from the doc module, and if it isn't there,
# get it from parent instead. We only do this if we are overriding pandas
# documentation. We don't touch other docs.
if doc_module != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
overwrite_existing = True

if parent not in excluded:
_replace_doc(parent, cls_or_func, overwrite_existing, apilink)

if not isinstance(cls_or_func, types.FunctionType):
seen = set()
for base in cls_or_func.__mro__: # type: ignore[attr-defined]
if base is object:
continue
for attr, obj in base.__dict__.items():
if attr in seen:
continue
seen.add(attr)
# Try to get the attribute from the docs class first, then
# from the default parent (pandas), and if it's not in either,
# set `parent_obj` to `None`.
parent_obj = getattr(parent, attr, getattr(default_parent, attr, None))
if (
parent_obj in excluded
or not _documentable_obj(parent_obj)
or not _documentable_obj(obj)
):
continue

_replace_doc(
parent_obj,
obj,
overwrite_existing,
apilink,
parent_cls=cls_or_func,
attr_name=attr,
)


def _inherit_docstrings(
parent: object,
excluded: List[object] = [],
Expand Down Expand Up @@ -416,73 +539,26 @@ def _inherit_docstrings(
are not defined in target class (but are defined in the ancestor class),
which means that ancestor class attribute docstrings could also change.
"""
# Import the docs module and get the class (e.g. `DataFrame`).
imported_doc_module = importlib.import_module(DocModule.get())
# Set the default parent so we can use it in case some docs are missing from
# parent module.
default_parent = parent
# Try to get the parent object from the doc module, and if it isn't there,
# get it from parent instead. We only do this if we are overriding pandas
# documentation. We don't touch other docs.
if DocModule.get() != DocModule.default and "pandas" in str(
getattr(parent, "__module__", "")
):
parent = getattr(imported_doc_module, getattr(parent, "__name__", ""), parent)
if parent != default_parent:
# Reset API link in case the docs are overridden.
apilink = None
overwrite_existing = True

def _documentable_obj(obj: object) -> bool:
"""Check if `obj` docstring could be patched."""
return bool(
callable(obj)
and not inspect.isclass(obj)
or (isinstance(obj, property) and obj.fget)
or (isinstance(obj, functools.cached_property))
or (isinstance(obj, (staticmethod, classmethod)) and obj.__func__)
)

def decorator(cls_or_func: Fn) -> Fn:
if parent not in excluded:
_replace_doc(parent, cls_or_func, overwrite_existing, apilink)

if not isinstance(cls_or_func, types.FunctionType):
seen = set()
for base in cls_or_func.__mro__: # type: ignore[attr-defined]
if base is object:
continue
for attr, obj in base.__dict__.items():
if attr in seen:
continue
seen.add(attr)
# Try to get the attribute from the docs class first, then
# from the default parent (pandas), and if it's not in either,
# set `parent_obj` to `None`.
parent_obj = getattr(
parent, attr, getattr(default_parent, attr, None)
)
if (
parent_obj in excluded
or not _documentable_obj(parent_obj)
or not _documentable_obj(obj)
):
continue

_replace_doc(
parent_obj,
obj,
overwrite_existing,
apilink,
parent_cls=cls_or_func,
attr_name=attr,
)

inherit_docstring_in_place = functools.partial(
_inherit_docstrings_in_place,
cls_or_func=cls_or_func,
parent=parent,
excluded=excluded,
overwrite_existing=overwrite_existing,
apilink=apilink,
)
inherit_docstring_in_place(doc_module=DocModule.get())
_docstring_inheritance_calls.append(inherit_docstring_in_place)
return cls_or_func

return decorator


DocModule.subscribe(_update_inherited_docstrings)


def expanduser_path_arg(argname: str) -> Callable[[Fn], Fn]:
"""
Decorate a function replacing its path argument with "user-expanded" value.
Expand Down

0 comments on commit 0a06455

Please sign in to comment.