Skip to content

Commit

Permalink
add count mode and overlap to sampan
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucpen committed Sep 17, 2024
1 parent 19a2435 commit f3d2d0c
Showing 1 changed file with 32 additions and 4 deletions.
36 changes: 32 additions & 4 deletions bin/drop_sample_annot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import argparse
import csv
from pandas import read_csv, DataFrame, concat
from pandas import read_csv, DataFrame, concat, isna
import os

SCRIPT_VERSION = "v1.1"
Expand Down Expand Up @@ -56,7 +56,7 @@ def is_paired_end(single_end: str) -> bool:


def is_stranded(strandedness: str) -> str:
"""Logical funciton to determine if a sample is paired end"""
"""Logical funciton to determine sample strandness"""
if strandedness.lower() == "reverse":
return "reverse"
elif strandedness.lower() == "forward":
Expand All @@ -65,6 +65,30 @@ def is_stranded(strandedness: str) -> str:
return "no"


def count_mode(sample_count_mode: str) -> str:
"""Logical funciton to determine if count mode is given or default "IntersectionStrict" should be used"""
print("Hello")
print(sample_count_mode)
if isna(sample_count_mode) or sample_count_mode == "" or sample_count_mode == "NA":
return "IntersectionStrict"
else:
return sample_count_mode


def count_overlaps(sample_count_overlap: str) -> str:
"""Logical funciton to determine if count overlap is given or default "TRUE" should be used"""
print("Hello")
print(sample_count_overlap)
if (
isna(sample_count_overlap)
or sample_count_overlap == ""
or sample_count_overlap == "NA"
):
return True
else:
return sample_count_overlap


def write_final_annot_to_tsv(ref_count_file: str, ref_annot: str, out_file: str):
"""
Concatenates the Sample Annotation produced by SampleAnnotation with the one
Expand All @@ -81,6 +105,8 @@ def write_final_annot_to_tsv(ref_count_file: str, ref_annot: str, out_file: str)
)
print(f"Only {df_samples.shape[0]} samples were provided by the user")
df_samples.fillna("NA", inplace=True)
df_samples["COUNT_MODE"] = "IntersectionStrict"
df_samples["COUNT_OVERLAPS"] = True
df_samples.to_csv(out_file, index=False, sep="\t")
else:
df_reference: DataFrame = read_csv(ref_annot, sep="\t")
Expand All @@ -89,8 +115,10 @@ def write_final_annot_to_tsv(ref_count_file: str, ref_annot: str, out_file: str)
df_reference["SPLICE_COUNTS_DIR"].str.rstrip("/").apply(os.path.basename)
)
df_reference["DROP_GROUP"] = df_reference["DROP_GROUP"].str.replace(" ", "")
df_samples["COUNT_OVERLAPS"] = df_reference["COUNT_OVERLAPS"].iloc[0]
df_samples["COUNT_MODE"] = df_reference["COUNT_MODE"].iloc[0]
df_samples["COUNT_OVERLAPS"] = count_overlaps(
df_reference["COUNT_OVERLAPS"].iloc[0]
)
df_samples["COUNT_MODE"] = count_mode(df_reference["COUNT_MODE"].iloc[0])
df_samples["HPO_TERMS"] = df_reference["HPO_TERMS"].iloc[0]
for id in df_samples["RNA_ID"]:
df_reference = df_reference[
Expand Down

0 comments on commit f3d2d0c

Please sign in to comment.