updated

xiaoli-dong · Jan 19, 2024 · 2f22695 · 2f22695
1 parent 7f8c57b
commit 2f22695
Show file tree

Hide file tree

Showing 70 changed files with 3,020 additions and 840 deletions.
diff --git a/bin/combine_jsons.py b/bin/combine_jsons.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+import argparse
+import json
+
+def main():
+
+    description = "Combine multiple json files into a single json file"
+    parser = argparse.ArgumentParser(description=description)
+
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        help=f"Comma seperated json file list, for example: 'f1.json,f2.json,f3.json'\n",
+    )
+    parser.add_argument("-o", "--output", required=True, default="combined.json", help=f"Output file name\n")
+
+    args = parser.parse_args()
+    json_files = args.input.split(sep=',')
+
+    # Create a list of all the JSON files that you want to combine.
+    #json_files = ["file1.json", "file2.json", "file3.json"]
+
+    # Create an empty list to store the Python objects.
+    python_objects = []
+
+    # Load each JSON file into a Python object.
+    for json_file in json_files:
+        print(json_file)
+        with open(json_file, "r") as fin:
+            data = json.load(fin)
+            #python_objects.append(json.load(fin, strict=False))
+            python_objects.append(data)
+    # Dump all the Python objects into a single JSON file.
+    with open(args.output, "w") as fout:
+        json.dump(python_objects, fout, indent=4)
+
+    fout.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/combine_xml.py b/bin/combine_xml.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+
+import argparse
+import xml.etree.ElementTree as ET
+import sys
+def main():
+
+    description = "Combine multiple xml files into a single xml file"
+    parser = argparse.ArgumentParser(description=description)
+
+    # help=f"Space seperated xml file list, for example: 'f1.xml f2.xml f3.xml'\n",
+    parser.add_argument('-i', "--input", required=True, help=f"space seperated xml file name list\n")
+    parser.add_argument("-o", "--output", required=True, default="combined.xml", help=f"Output file name\n")
+
+    args = parser.parse_args()
+
+    xml_files = args.input.split()
+
+    with open(args.output, "a+") as fout:
+        # Load each JSON file into a Python object.
+        xml_element_tree = None
+        for xml_file in xml_files:
+            data = ET.tostring(ET.parse(xml_file).getroot()).decode("utf-8")
+            fout.write(data)    
+    fout.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/formatCSV.py b/bin/formatCSV.py
diff --git a/bin/reformat_assembly-stats_tsv.py b/bin/reformat_assembly-stats_tsv.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+
+import argparse
+import csv
+
+def main():
+
+    description = "add header to emmtyper output and get rid of .tmp from the isolate name"
+    parser = argparse.ArgumentParser(description=description)
+
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        help=f"Comma seperated json file list, for example: 'f1.json,f2.json,f3.json'\n",
+    )
+    parser.add_argument(
+        "-o", 
+        "--output", 
+        required=True, 
+        default="", 
+        help=f"Output file name\n"
+    )
+    parser.add_argument(
+        "-s", 
+        "--sname", 
+        required=True, 
+        default="", 
+        help=f"delimiter\n"
+    )
+    parser.add_argument(
+        "-d", 
+        "--delimiter", 
+        default="\t", 
+        help=f"input and output delimiter\n"
+    )
+    args = parser.parse_args()
+
+    with open(args.input, "r", encoding="utf8") as f_input:
+            with open(args.output, "w") as f_output:
+                csvreader = csv.DictReader(f_input, delimiter=args.delimiter)
+                header = csvreader.fieldnames
+                header[0] = "sampleid"
+
+                rows = []
+                for row in csvreader:
+                    row[header[0]] = args.sname
+                    rows.append(row)
+
+
+                writer = csv.DictWriter(f_output, fieldnames=header, delimiter=args.delimiter)
+                writer.writeheader()
+                writer.writerows(rows)
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/reformat_emmtyper_csv.py b/bin/reformat_emmtyper_csv.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+import argparse
+import csv
+
+def main():
+
+    description = "add header to emmtyper output and get rid of .tmp from the sampleid"
+    parser = argparse.ArgumentParser(description=description)
+
+    parser.add_argument(
+        "-i",
+        "--input",
+        required=True,
+        help=f"emmtyper csv output'\n",
+    )
+    parser.add_argument(
+        "-o", 
+        "--output", 
+        required=True, 
+        default="", 
+        help=f"Output file name\n"
+    )
+    args = parser.parse_args()
+
+    header = [
+        "sampleid", 
+        "num_of_blast_hits",
+        "num_of_clusters",
+        "emm-type",
+        "emm-type-positions",
+        "emm-like",
+        "emm-like-positions",
+        "EMM-cluster"
+    ]
+    with open(args.input, "r", encoding="utf8") as f_input:
+        with open(args.output, 'w', newline='') as f_output:
+            reader = csv.reader(f_input, delimiter='\t')
+            writer = csv.writer(f_output, delimiter=',')
+            writer.writerow(header)
+            for row in reader:
+                row[0] = row[0].rstrip(".tmp")
+                writer.writerow(row)
+
+
+if __name__ == "__main__":
+    main()