Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read and extract cwa from command line #43

Merged
merged 2 commits into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,10 @@ $ conda install -c oxwear actipy

## Usage

Process an Axivity3 (.cwa) file:
```python
import actipy

data, info = actipy.read_device("sample.cwa.gz",
data, info = actipy.read_device("sample.cwa.gz", # or "sample.gt3x.gz" or "sample.bin.gz"
lowpass_hz=20,
calibrate_gravity=True,
detect_nonwear=True,
Expand Down Expand Up @@ -85,7 +84,8 @@ info [dict]
```
Refer to the [Data Dictionary](data-dictionary.md) for a comprehensive list of outputs.

If you have a CSV file that you want to process, you can also use the data processing routines from `actipy.processing`:
### Processing a custom CSV file
You can also use the routines in `actipy.processing` to process custom CSV files, or for more fine-grained control:

```python
import actipy.processing as P
Expand All @@ -98,6 +98,17 @@ data, info_resample = P.resample(data, sample_rate)

See the [documentation](https://actipy.readthedocs.io/en/latest/) for more.

### Processing from the command line
We also provide a command-line tool to process a device file (currently, only AX3 and AX6 are supported) and output to CSV:
```bash
$ read_cwa sample.cwa.gz -o outputs --lowpass-hz 20 --resample-hz 50 --calibrate-gravity --detect-nonwear
```

Outputs:
- "outputs/sample/sample.csv.gz"
- "outputs/sample/sample-Info.json"


## Contributing
If you would like to contribute to this repository, please check out [CONTRIBUTING.md](https://github.com/OxWearables/actipy/blob/main/CONTRIBUTING.md).
We welcome contributions in the form of bug reports, feature requests, and pull requests.
Expand Down
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def get_string(string, rel_path="src/actipy/__init__.py"):
"scipy>=1.7",
"pandas>=1.3",
"statsmodels>=0.13",
# "tqdm==4.64.*",
],
extras_require={
"dev": [
Expand All @@ -72,6 +73,11 @@ def get_string(string, rel_path="src/actipy/__init__.py"):
"sphinx_rtd_theme>=1.0",
"readthedocs-sphinx-search>=0.1",
"docutils<0.18",
],
},
entry_points={
"console_scripts": [
"read_cwa=actipy.read_cwa:main",
]
}
)
167 changes: 167 additions & 0 deletions src/actipy/read_cwa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import time
from pathlib import Path
import argparse
import pandas as pd
import numpy as np
import os
import json
# from tqdm import tqdm

from actipy import read_device

"""
How to run the script:

```bash
python src/actipy/read_cwa.py data/test.bin

python src/actipy/read_cwa.py data/test.bin -o data/prepared/ -r 30 -g -f 20 -w -c x y z -q
```
"""


def main():
parser = argparse.ArgumentParser(
description="A tool to read and extract data from an Axivity (.cwa) device, and save it to a .csv file",
add_help=True,
)
parser.add_argument("filepath", help="Enter file to be processed")
parser.add_argument("--outdir", "-o", help="Enter folder location to save output files", default="outputs/")
parser.add_argument("--resample-hz", "-r", help="Resample rate for output data.", type=str, default=None)
parser.add_argument("--lowpass-hz", "-f", help="Frequency of low pass filter.", type=str, default=None)
parser.add_argument("--detect-nonwear", "-w", help="Detect non-wear.", action="store_true")
parser.add_argument("--calibrate-gravity", "-g", help="Calibrate gravity.", action="store_true")
parser.add_argument("--output-cols", "-c", help="Restrict output columns to those listed (excluding time index column). Output all columns if falsy.", type=str, nargs="+", default=None)
parser.add_argument("--quiet", "-q", action="store_true", help="Suppress output.")

args = parser.parse_args()

verbose = not args.quiet
lowpass_hz = validate_lowpass_hz(args.lowpass_hz)
resample_hz = validate_resample_hz(args.resample_hz)

data, info = read_device(
args.filepath,
lowpass_hz=lowpass_hz,
calibrate_gravity=args.calibrate_gravity,
detect_nonwear=args.detect_nonwear,
resample_hz=resample_hz,
verbose=verbose,
)

output_cols = validate_output_cols(args.output_cols, data) or list(data.columns)
data = data[output_cols]

# Output paths
basename = resolve_path(args.filepath)[1]
outdir = Path(args.outdir) / basename
outdir.mkdir(parents=True, exist_ok=True)

csv_file = outdir / f"{basename}.csv.gz"
if verbose:
print("Saving dataframe to disk...", end="\r")
before = time.perf_counter()
data.to_csv(csv_file, index=True)
elapsed_time = time.perf_counter() - before
if verbose:
print(f"Saving dataframe to disk... Done! ({elapsed_time:0.2f}s)")
print(f"Dataframe saved to: {os.path.abspath(csv_file)}")

info_file = outdir / f"{basename}-Info.json"
with open(info_file, "w") as f:
json.dump(info, f, ensure_ascii=False, indent=4, cls=NpEncoder)
if verbose:
print(f"Info file saved to: {os.path.abspath(info_file)}")


def validate_resample_hz(resample_hz):
if resample_hz is None or resample_hz.lower() in ["none", ""]:
return None
if resample_hz.lower() in ["true"]:
return True
if resample_hz.lower() in ["false"]:
return False
try:
resample_hz = int(resample_hz)
except ValueError:
raise ValueError("Sample rate must be a number, 'None', 'True', or 'False'.")
return resample_hz


def validate_lowpass_hz(lowpass_hz):
if lowpass_hz is None or lowpass_hz.lower() in ["none", ""]:
return None
if lowpass_hz.lower() in ["false"]:
return False
try:
lowpass_hz = int(lowpass_hz)
except ValueError:
raise ValueError("Lowpass hz must be a number, 'None', or 'False'.")
return lowpass_hz


def validate_output_cols(output_cols, data: pd.DataFrame):
if output_cols is None or output_cols == []:
return None
if len(output_cols) == 1:
if output_cols[0].lower() in ["none", "", "false"]:
return None

for elem in output_cols:
if elem not in data.columns:
raise ValueError(
f"Column {elem} is not a column in the extracted data: {list(data.columns)}."
)

return output_cols


def df_to_csv(
df: pd.DataFrame, filename: str, progress_desc: str = "", verbose: bool = False
):
if verbose:
chunks = np.array_split(df.index, 1000)

pbar = tqdm(
total=len(chunks),
bar_format="{desc}|{bar}| {percentage:3.0f}% [{elapsed}<{remaining}]",
desc=progress_desc,
)

for chunk, subset in enumerate(chunks):
if chunk == 0:
df.loc[subset].to_csv(filename, mode="w")
else:
df.loc[subset].to_csv(filename, header=None, mode="a")

pbar.update(1)

pbar.close()
else:
df.to_csv(filename, index=True)


def resolve_path(path):
""" Return parent folder, file name and file extension """
p = Path(path)
extension = p.suffixes[0]
filename = p.name.rsplit(extension)[0]
dirname = p.parent
return dirname, filename, extension


class NpEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
if pd.isnull(obj): # handles pandas NAType
return np.nan
return json.JSONEncoder.default(self, obj)


if __name__ == "__main__":
main()
Loading