diff --git a/countess/core/config.py b/countess/core/config.py index c81883f..f9929c7 100644 --- a/countess/core/config.py +++ b/countess/core/config.py @@ -70,7 +70,9 @@ def read_config( if key.startswith("_parent."): node.add_parent(nodes_by_name[val]) - node.config = [(key, ast.literal_eval(val), base_dir) for key, val in config_dict.items() if not key.startswith("_")] + node.config = [ + (key, ast.literal_eval(val), base_dir) for key, val in config_dict.items() if not key.startswith("_") + ] nodes_by_name[section_name] = node diff --git a/countess/core/pipeline.py b/countess/core/pipeline.py index 3e4f852..0d236c8 100644 --- a/countess/core/pipeline.py +++ b/countess/core/pipeline.py @@ -1,9 +1,9 @@ from dataclasses import dataclass, field +from itertools import chain from multiprocessing import Process, Queue from os import cpu_count from queue import Empty from typing import Any, Iterable, Optional -from itertools import chain from countess.core.logger import Logger from countess.core.plugins import BasePlugin, FileInputPlugin, ProcessPlugin, get_plugin_classes @@ -96,10 +96,14 @@ def execute(self, logger: Logger, row_limit: Optional[int] = None): elif isinstance(self.plugin, FileInputPlugin): num_files = self.plugin.num_files() row_limit_each_file = row_limit // num_files if row_limit is not None else None - self.result = multi_iterator_map(self.plugin.load_file, range(0, num_files), args=(logger, row_limit_each_file)) + self.result = multi_iterator_map( + self.plugin.load_file, range(0, num_files), args=(logger, row_limit_each_file) + ) elif isinstance(self.plugin, ProcessPlugin): self.plugin.prepare([p.name for p in self.parent_nodes], row_limit) - self.result = chain(self.plugin.collect(self.process_parent_iterables(logger)), self.plugin.finalize(logger)) + self.result = chain( + self.plugin.collect(self.process_parent_iterables(logger)), self.plugin.finalize(logger) + ) if row_limit is not None or len(self.child_nodes) != 1: self.result = list(self.result) @@ -238,7 +242,9 @@ def tidy(self): if len(node.parent_nodes) == 0: stratum[node] = min(stratum[n] for n in node.child_nodes) - 1 else: - stratum[node] = (min(stratum[n] for n in node.child_nodes) + max(stratum[n] for n in node.parent_nodes)) // 2 + stratum[node] = ( + min(stratum[n] for n in node.child_nodes) + max(stratum[n] for n in node.parent_nodes) + ) // 2 max_stratum = max(stratum.values()) diff --git a/countess/core/plugins.py b/countess/core/plugins.py index aa49003..6873b36 100644 --- a/countess/core/plugins.py +++ b/countess/core/plugins.py @@ -444,63 +444,81 @@ def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame: # Six combinations of the five mixins! -class PandasTransformSingleToSinglePlugin(PandasTransformXToSingleMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin): +class PandasTransformSingleToSinglePlugin( + PandasTransformXToSingleMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin +): """Transformer which takes a single column and returns a single value""" def process_value(self, value, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_value()") -class PandasTransformSingleToTuplePlugin(PandasTransformXToTupleMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin): +class PandasTransformSingleToTuplePlugin( + PandasTransformXToTupleMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin +): """Transformer which takes a single column and returns a tuple of values""" def process_value(self, value, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_value()") -class PandasTransformSingleToDictPlugin(PandasTransformXToDictMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin): +class PandasTransformSingleToDictPlugin( + PandasTransformXToDictMixin, PandasTransformSingleToXMixin, PandasTransformBasePlugin +): """Transformer which takes a single column and returns a dictionary of values""" def process_value(self, value, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_value()") -class PandasTransformRowToSinglePlugin(PandasTransformXToSingleMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin): +class PandasTransformRowToSinglePlugin( + PandasTransformXToSingleMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a single value""" def process_row(self, row: pd.Series, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_row()") -class PandasTransformRowToTuplePlugin(PandasTransformXToTupleMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin): +class PandasTransformRowToTuplePlugin( + PandasTransformXToTupleMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a tuple of values""" def process_row(self, row: pd.Series, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_row()") -class PandasTransformRowToDictPlugin(PandasTransformXToDictMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin): +class PandasTransformRowToDictPlugin( + PandasTransformXToDictMixin, PandasTransformRowToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a dictionary of values""" def process_row(self, row: pd.Series, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_row()") -class PandasTransformDictToSinglePlugin(PandasTransformXToSingleMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin): +class PandasTransformDictToSinglePlugin( + PandasTransformXToSingleMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a single value""" def process_dict(self, data, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_dict()") -class PandasTransformDictToTuplePlugin(PandasTransformXToTupleMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin): +class PandasTransformDictToTuplePlugin( + PandasTransformXToTupleMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a tuple of values""" def process_dict(self, data, logger: Logger): raise NotImplementedError(f"{self.__class__}.process_dict()") -class PandasTransformDictToDictPlugin(PandasTransformXToDictMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin): +class PandasTransformDictToDictPlugin( + PandasTransformXToDictMixin, PandasTransformDictToXMixin, PandasTransformBasePlugin +): """Transformer which takes a whole row and returns a dictionary of values""" def process_dict(self, data, logger: Logger): @@ -527,8 +545,8 @@ def num_files(self): def load_file(self, file_number: int, logger: Logger, row_limit: Optional[int] = None) -> Iterable[pd.DataFrame]: raise NotImplementedError(f"{self.__class__}.load_file()") -class PandasInputFilesPlugin(PandasInputPlugin): +class PandasInputFilesPlugin(PandasInputPlugin): def num_files(self): return len(self.parameters["files"]) @@ -541,7 +559,6 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None) -> pd.Data raise NotImplementedError(f"{self.__class__}.read_file_to_dataframe") - class PandasOutputPlugin(PandasProcessPlugin): def process_inputs(self, inputs: Mapping[str, Iterable[pd.DataFrame]], logger: Logger, row_limit: Optional[int]): iterators = set(iter(input) for input in inputs.values()) diff --git a/countess/gui/config.py b/countess/gui/config.py index 82e0b8c..9807243 100644 --- a/countess/gui/config.py +++ b/countess/gui/config.py @@ -93,7 +93,9 @@ def __init__( # pylint: disable=R0912,R0915 if parameter.read_only: self.entry["state"] = tk.DISABLED - elif isinstance(parameter, ArrayParam) and self.level == 0 and not isinstance(parameter.param, TabularMultiParam): + elif ( + isinstance(parameter, ArrayParam) and self.level == 0 and not isinstance(parameter.param, TabularMultiParam) + ): self.entry = tk.Frame(tk_parent) self.entry.columnconfigure(0, weight=1) drc = self.delete_row_callback if not parameter.read_only else None @@ -168,7 +170,11 @@ def update(self): else: self.entry["fg"] = None - if isinstance(self.parameter, ArrayParam) and self.level == 0 and not isinstance(self.parameter.param, TabularMultiParam): + if ( + isinstance(self.parameter, ArrayParam) + and self.level == 0 + and not isinstance(self.parameter.param, TabularMultiParam) + ): self.update_subwrappers_framed(self.parameter.params, self.delete_row_callback) if self.button: self.button.grid(row=len(self.parameter.params) + 1, padx=10) @@ -457,7 +463,9 @@ def update(self) -> None: if key in self.wrapper_cache: self.wrapper_cache[key].update() else: - self.wrapper_cache[key] = ParameterWrapper(self.subframe, parameter, self.change_parameter, level=top_level) + self.wrapper_cache[key] = ParameterWrapper( + self.subframe, parameter, self.change_parameter, level=top_level + ) self.wrapper_cache[key].set_row(n + 1) # Remove any parameter wrappers no longer needed diff --git a/countess/gui/main.py b/countess/gui/main.py index e73036b..ad6b883 100644 --- a/countess/gui/main.py +++ b/countess/gui/main.py @@ -148,7 +148,7 @@ def show_preview_subframe(self): self.preview_subframe.destroy() if all(isinstance(r, (str, bytes)) for r in self.node.result): - text_result = ''.join(self.node.result) + text_result = "".join(self.node.result) self.preview_subframe = tk.Frame(self.frame) self.preview_subframe.rowconfigure(1, weight=1) n_lines = len(text_result.splitlines()) @@ -230,7 +230,9 @@ class ButtonMenu: # pylint: disable=R0903 def __init__(self, tk_parent, buttons): self.frame = tk.Frame(tk_parent) for button_number, (button_label, button_command) in enumerate(buttons): - tk.Button(self.frame, text=button_label, command=button_command).grid(row=0, column=button_number, sticky=tk.EW) + tk.Button(self.frame, text=button_label, command=button_command).grid( + row=0, column=button_number, sticky=tk.EW + ) self.frame.grid(sticky=tk.NSEW) diff --git a/countess/plugins/correlation.py b/countess/plugins/correlation.py index c7efa81..3918fc5 100644 --- a/countess/plugins/correlation.py +++ b/countess/plugins/correlation.py @@ -1,4 +1,4 @@ -from typing import Optional, Iterable +from typing import Iterable, Optional import pandas as pd @@ -22,14 +22,14 @@ class CorrelationPlugin(PandasSimplePlugin): "column1": ColumnChoiceParam("Column 1"), "column2": ColumnChoiceParam("Column 2"), } - columns : list[str] = [] - dataframes : list[pd.DataFrame] = [] + columns: list[str] = [] + dataframes: list[pd.DataFrame] = [] def prepare(self, sources: list[str], row_limit: Optional[int]): assert isinstance(self.parameters["group"], ColumnOrNoneChoiceParam) column1 = self.parameters["column1"].value column2 = self.parameters["column2"].value - self.columns = [ column1, column2 ] + self.columns = [column1, column2] if not self.parameters["group"].is_none(): self.columns.append(self.parameters["group"].value) self.dataframes = [] diff --git a/countess/plugins/data_table.py b/countess/plugins/data_table.py index 2ef82f6..e94fb66 100644 --- a/countess/plugins/data_table.py +++ b/countess/plugins/data_table.py @@ -83,6 +83,8 @@ def load_file(self, file_number: int, logger: Logger, row_limit: Optional[int] = self.fix_columns() values = [] for row in self.parameters["rows"]: - values.append(dict((col["name"].value, row[str(num)].value) for num, col in enumerate(self.parameters["columns"]))) + values.append( + dict((col["name"].value, row[str(num)].value) for num, col in enumerate(self.parameters["columns"])) + ) yield pd.DataFrame(values) diff --git a/countess/plugins/group_by.py b/countess/plugins/group_by.py index 0ffb8da..85dd48e 100644 --- a/countess/plugins/group_by.py +++ b/countess/plugins/group_by.py @@ -1,4 +1,4 @@ -from typing import Iterable, Optional, List +from typing import Iterable, List, Optional import pandas as pd @@ -44,7 +44,7 @@ class GroupByPlugin(PandasSimplePlugin): "join": BooleanParam("Join Back?"), } - dataframes : Optional[List[pd.DataFrame]] = None + dataframes: Optional[List[pd.DataFrame]] = None def prepare(self, *_): self.dataframes = [] diff --git a/countess/plugins/mutagenize.py b/countess/plugins/mutagenize.py index fa74147..cf8b8e1 100644 --- a/countess/plugins/mutagenize.py +++ b/countess/plugins/mutagenize.py @@ -9,7 +9,9 @@ from countess.core.plugins import PandasInputPlugin -def mutagenize(sequence: str, mutate: bool, delete: bool, insert: bool) -> Iterable[tuple[str, int, Optional[str], Optional[str]]]: +def mutagenize( + sequence: str, mutate: bool, delete: bool, insert: bool +) -> Iterable[tuple[str, int, Optional[str], Optional[str]]]: # XXX it'd be faster, but less neat, to include logic for duplicate # removal here instead of producing duplicates and then removing them # later. diff --git a/countess/plugins/pivot.py b/countess/plugins/pivot.py index fc6b745..7aeb347 100644 --- a/countess/plugins/pivot.py +++ b/countess/plugins/pivot.py @@ -17,7 +17,9 @@ class PivotPlugin(PandasSimplePlugin): version = VERSION link = "https://countess-project.github.io/CountESS/plugins/#pivot-tool" - parameters = {"columns": PerColumnArrayParam("Columns", ChoiceParam("Role", choices=["Index", "Pivot", "Expand", "Drop"]))} + parameters = { + "columns": PerColumnArrayParam("Columns", ChoiceParam("Role", choices=["Index", "Pivot", "Expand", "Drop"])) + } def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataFrame: assert isinstance(self.parameters["columns"], PerColumnArrayParam) diff --git a/countess/plugins/regex.py b/countess/plugins/regex.py index 356719c..d46df9a 100644 --- a/countess/plugins/regex.py +++ b/countess/plugins/regex.py @@ -125,7 +125,9 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None): output_parameters = list(self.parameters["output"])[: compiled_re.groups] columns = [p.name.value or f"column_{n+1}" for n, p in enumerate(output_parameters)] - index_columns = [p.name.value or f"column_{n+1}" for n, p in enumerate(output_parameters) if p.index.value] or None + index_columns = [ + p.name.value or f"column_{n+1}" for n, p in enumerate(output_parameters) if p.index.value + ] or None records = [] with open(file_params["filename"].value, "r", encoding="utf-8") as fh: diff --git a/countess/utils/pandas.py b/countess/utils/pandas.py index 458e347..4a1b85d 100644 --- a/countess/utils/pandas.py +++ b/countess/utils/pandas.py @@ -8,7 +8,11 @@ def get_all_indexes(dataframe: pd.DataFrame) -> Dict[str, Any]: if dataframe.index.name: return {str(dataframe.index.name): dataframe.index.dtype} - elif hasattr(dataframe.index, "names") and hasattr(dataframe.index, "dtypes") and dataframe.index.names[0] is not None: + elif ( + hasattr(dataframe.index, "names") + and hasattr(dataframe.index, "dtypes") + and dataframe.index.names[0] is not None + ): return dict(zip(dataframe.index.names, dataframe.index.dtypes)) else: return {} diff --git a/pyproject.toml b/pyproject.toml index 0737924..59a260b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -100,7 +100,7 @@ disable = [ "too-many-public-methods", "unidiomatic-typecheck", ] -max-line-length = 132 +max-line-length = 120 [tool.black] -line-length = 132 +line-length = 120