From 1be6a33de8ca2857cc3bf090ac6c34f2508160e5 Mon Sep 17 00:00:00 2001 From: Nick Moore Date: Thu, 21 Mar 2024 11:10:03 +1100 Subject: [PATCH] regex tool no longer needs to index since column tool exists --- countess/plugins/regex.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/countess/plugins/regex.py b/countess/plugins/regex.py index d519b68..f4c4daa 100644 --- a/countess/plugins/regex.py +++ b/countess/plugins/regex.py @@ -36,7 +36,6 @@ class RegexToolPlugin(PandasTransformSingleToTuplePlugin): "Column Type", "string", ), - "index": BooleanParam("Index?"), }, ), ), @@ -68,10 +67,6 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional except KeyError: pass - index_names = [pp.name.value for pp in self.parameters["output"] if pp.index.value] - if index_names: - df = df.set_index(index_names) - return df def process_value(self, value: str, logger: Logger) -> Optional[Iterable]: @@ -137,7 +132,6 @@ class RegexReaderPlugin(PandasInputFilesPlugin): "Column Type", "string", ), - "index": BooleanParam("Index?", False), }, ), ), @@ -153,9 +147,6 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None): output_parameters = list(self.parameters["output"])[: compiled_re.groups] columns = [p.name.value or f"column_{n+1}" for n, p in enumerate(output_parameters)] - index_columns = [ - p.name.value or f"column_{n+1}" for n, p in enumerate(output_parameters) if p.index.value - ] or None records = [] with open(file_params["filename"].value, "r", encoding="utf-8") as fh: @@ -171,11 +162,11 @@ def read_file_to_dataframe(self, file_params, logger, row_limit=None): if len(records) >= row_limit or num > 100 * row_limit: break elif len(records) >= 100000: - pdfs.append(pd.DataFrame.from_records(records, columns=columns, index=index_columns)) + pdfs.append(pd.DataFrame.from_records(records, columns=columns)) records = [] if len(records) > 0: - pdfs.append(pd.DataFrame.from_records(records, columns=columns, index=index_columns)) + pdfs.append(pd.DataFrame.from_records(records, columns=columns)) if len(pdfs) == 0: return pd.DataFrame([], columns=columns)