From 4172440196c353ea1a0d83b3b1bfbe826bbe2a8e Mon Sep 17 00:00:00 2001 From: Shing Chan Date: Wed, 24 Jul 2024 18:28:55 +0100 Subject: [PATCH 1/2] feat: create new cli `read_cwa` to parse and save data to disk --- setup.py | 6 ++ src/actipy/read_cwa.py | 167 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 src/actipy/read_cwa.py diff --git a/setup.py b/setup.py index aeba221..4385c9b 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def get_string(string, rel_path="src/actipy/__init__.py"): "scipy>=1.7", "pandas>=1.3", "statsmodels>=0.13", + # "tqdm==4.64.*", ], extras_require={ "dev": [ @@ -72,6 +73,11 @@ def get_string(string, rel_path="src/actipy/__init__.py"): "sphinx_rtd_theme>=1.0", "readthedocs-sphinx-search>=0.1", "docutils<0.18", + ], + }, + entry_points={ + "console_scripts": [ + "read_cwa=actipy.read_cwa:main", ] } ) diff --git a/src/actipy/read_cwa.py b/src/actipy/read_cwa.py new file mode 100644 index 0000000..a2cba7f --- /dev/null +++ b/src/actipy/read_cwa.py @@ -0,0 +1,167 @@ +import time +from pathlib import Path +import argparse +import pandas as pd +import numpy as np +import os +import json +# from tqdm import tqdm + +from actipy import read_device + +""" +How to run the script: + +```bash +python src/actipy/read_cwa.py data/test.bin + +python src/actipy/read_cwa.py data/test.bin -o data/prepared/ -r 30 -g -f 20 -w -c x y z -q +``` +""" + + +def main(): + parser = argparse.ArgumentParser( + description="A tool to read and extract data from an Axivity (.cwa) device, and save it to a .csv file", + add_help=True, + ) + parser.add_argument("filepath", help="Enter file to be processed") + parser.add_argument("--outdir", "-o", help="Enter folder location to save output files", default="outputs/") + parser.add_argument("--resample-hz", "-r", help="Resample rate for output data.", type=str, default=None) + parser.add_argument("--lowpass-hz", "-f", help="Frequency of low pass filter.", type=str, default=None) + parser.add_argument("--detect-nonwear", "-w", help="Detect non-wear.", action="store_true") + parser.add_argument("--calibrate-gravity", "-g", help="Calibrate gravity.", action="store_true") + parser.add_argument("--output-cols", "-c", help="Restrict output columns to those listed (excluding time index column). Output all columns if falsy.", type=str, nargs="+", default=None) + parser.add_argument("--quiet", "-q", action="store_true", help="Suppress output.") + + args = parser.parse_args() + + verbose = not args.quiet + lowpass_hz = validate_lowpass_hz(args.lowpass_hz) + resample_hz = validate_resample_hz(args.resample_hz) + + data, info = read_device( + args.filepath, + lowpass_hz=lowpass_hz, + calibrate_gravity=args.calibrate_gravity, + detect_nonwear=args.detect_nonwear, + resample_hz=resample_hz, + verbose=verbose, + ) + + output_cols = validate_output_cols(args.output_cols, data) or list(data.columns) + data = data[output_cols] + + # Output paths + basename = resolve_path(args.filepath)[1] + outdir = Path(args.outdir) / basename + outdir.mkdir(parents=True, exist_ok=True) + + csv_file = outdir / f"{basename}.csv.gz" + if verbose: + print("Saving dataframe to disk...", end="\r") + before = time.perf_counter() + data.to_csv(csv_file, index=True) + elapsed_time = time.perf_counter() - before + if verbose: + print(f"Saving dataframe to disk... Done! ({elapsed_time:0.2f}s)") + print(f"Dataframe saved to: {os.path.abspath(csv_file)}") + + info_file = outdir / f"{basename}-Info.json" + with open(info_file, "w") as f: + json.dump(info, f, ensure_ascii=False, indent=4, cls=NpEncoder) + if verbose: + print(f"Info file saved to: {os.path.abspath(info_file)}") + + +def validate_resample_hz(resample_hz): + if resample_hz is None or resample_hz.lower() in ["none", ""]: + return None + if resample_hz.lower() in ["true"]: + return True + if resample_hz.lower() in ["false"]: + return False + try: + resample_hz = int(resample_hz) + except ValueError: + raise ValueError("Sample rate must be a number, 'None', 'True', or 'False'.") + return resample_hz + + +def validate_lowpass_hz(lowpass_hz): + if lowpass_hz is None or lowpass_hz.lower() in ["none", ""]: + return None + if lowpass_hz.lower() in ["false"]: + return False + try: + lowpass_hz = int(lowpass_hz) + except ValueError: + raise ValueError("Lowpass hz must be a number, 'None', or 'False'.") + return lowpass_hz + + +def validate_output_cols(output_cols, data: pd.DataFrame): + if output_cols is None or output_cols == []: + return None + if len(output_cols) == 1: + if output_cols[0].lower() in ["none", "", "false"]: + return None + + for elem in output_cols: + if elem not in data.columns: + raise ValueError( + f"Column {elem} is not a column in the extracted data: {list(data.columns)}." + ) + + return output_cols + + +def df_to_csv( + df: pd.DataFrame, filename: str, progress_desc: str = "", verbose: bool = False +): + if verbose: + chunks = np.array_split(df.index, 1000) + + pbar = tqdm( + total=len(chunks), + bar_format="{desc}|{bar}| {percentage:3.0f}% [{elapsed}<{remaining}]", + desc=progress_desc, + ) + + for chunk, subset in enumerate(chunks): + if chunk == 0: + df.loc[subset].to_csv(filename, mode="w") + else: + df.loc[subset].to_csv(filename, header=None, mode="a") + + pbar.update(1) + + pbar.close() + else: + df.to_csv(filename, index=True) + + +def resolve_path(path): + """ Return parent folder, file name and file extension """ + p = Path(path) + extension = p.suffixes[0] + filename = p.name.rsplit(extension)[0] + dirname = p.parent + return dirname, filename, extension + + +class NpEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + if isinstance(obj, np.floating): + return float(obj) + if isinstance(obj, np.ndarray): + return obj.tolist() + if pd.isnull(obj): # handles pandas NAType + return np.nan + return json.JSONEncoder.default(self, obj) + + +if __name__ == "__main__": + main() From 02d67dc017bce0b9ee54163f7acb7a079767cbc6 Mon Sep 17 00:00:00 2001 From: Shing Chan Date: Wed, 24 Jul 2024 18:30:15 +0100 Subject: [PATCH 2/2] Update README.md Add documentation on new command-line tool `read_cwa`. --- README.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6726556..c4ffe3e 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,10 @@ $ conda install -c oxwear actipy ## Usage -Process an Axivity3 (.cwa) file: ```python import actipy -data, info = actipy.read_device("sample.cwa.gz", +data, info = actipy.read_device("sample.cwa.gz", # or "sample.gt3x.gz" or "sample.bin.gz" lowpass_hz=20, calibrate_gravity=True, detect_nonwear=True, @@ -85,7 +84,8 @@ info [dict] ``` Refer to the [Data Dictionary](data-dictionary.md) for a comprehensive list of outputs. -If you have a CSV file that you want to process, you can also use the data processing routines from `actipy.processing`: +### Processing a custom CSV file +You can also use the routines in `actipy.processing` to process custom CSV files, or for more fine-grained control: ```python import actipy.processing as P @@ -98,6 +98,17 @@ data, info_resample = P.resample(data, sample_rate) See the [documentation](https://actipy.readthedocs.io/en/latest/) for more. +### Processing from the command line +We also provide a command-line tool to process a device file (currently, only AX3 and AX6 are supported) and output to CSV: +```bash +$ read_cwa sample.cwa.gz -o outputs --lowpass-hz 20 --resample-hz 50 --calibrate-gravity --detect-nonwear +``` + +Outputs: + - "outputs/sample/sample.csv.gz" + - "outputs/sample/sample-Info.json" + + ## Contributing If you would like to contribute to this repository, please check out [CONTRIBUTING.md](https://github.com/OxWearables/actipy/blob/main/CONTRIBUTING.md). We welcome contributions in the form of bug reports, feature requests, and pull requests.