Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoli-dong committed Jan 19, 2024
1 parent 7f8c57b commit 2f22695
Show file tree
Hide file tree
Showing 70 changed files with 3,020 additions and 840 deletions.
43 changes: 43 additions & 0 deletions bin/combine_jsons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python

import argparse
import json

def main():

description = "Combine multiple json files into a single json file"
parser = argparse.ArgumentParser(description=description)

parser.add_argument(
"-i",
"--input",
required=True,
help=f"Comma seperated json file list, for example: 'f1.json,f2.json,f3.json'\n",
)
parser.add_argument("-o", "--output", required=True, default="combined.json", help=f"Output file name\n")

args = parser.parse_args()
json_files = args.input.split(sep=',')

# Create a list of all the JSON files that you want to combine.
#json_files = ["file1.json", "file2.json", "file3.json"]

# Create an empty list to store the Python objects.
python_objects = []

# Load each JSON file into a Python object.
for json_file in json_files:
print(json_file)
with open(json_file, "r") as fin:
data = json.load(fin)
#python_objects.append(json.load(fin, strict=False))
python_objects.append(data)
# Dump all the Python objects into a single JSON file.
with open(args.output, "w") as fout:
json.dump(python_objects, fout, indent=4)

fout.close()


if __name__ == "__main__":
main()
29 changes: 29 additions & 0 deletions bin/combine_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env python

import argparse
import xml.etree.ElementTree as ET
import sys
def main():

description = "Combine multiple xml files into a single xml file"
parser = argparse.ArgumentParser(description=description)

# help=f"Space seperated xml file list, for example: 'f1.xml f2.xml f3.xml'\n",
parser.add_argument('-i', "--input", required=True, help=f"space seperated xml file name list\n")
parser.add_argument("-o", "--output", required=True, default="combined.xml", help=f"Output file name\n")

args = parser.parse_args()

xml_files = args.input.split()

with open(args.output, "a+") as fout:
# Load each JSON file into a Python object.
xml_element_tree = None
for xml_file in xml_files:
data = ET.tostring(ET.parse(xml_file).getroot()).decode("utf-8")
fout.write(data)
fout.close()


if __name__ == "__main__":
main()
30 changes: 0 additions & 30 deletions bin/formatCSV.py

This file was deleted.

56 changes: 56 additions & 0 deletions bin/reformat_assembly-stats_tsv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python

import argparse
import csv

def main():

description = "add header to emmtyper output and get rid of .tmp from the isolate name"
parser = argparse.ArgumentParser(description=description)

parser.add_argument(
"-i",
"--input",
required=True,
help=f"Comma seperated json file list, for example: 'f1.json,f2.json,f3.json'\n",
)
parser.add_argument(
"-o",
"--output",
required=True,
default="",
help=f"Output file name\n"
)
parser.add_argument(
"-s",
"--sname",
required=True,
default="",
help=f"delimiter\n"
)
parser.add_argument(
"-d",
"--delimiter",
default="\t",
help=f"input and output delimiter\n"
)
args = parser.parse_args()

with open(args.input, "r", encoding="utf8") as f_input:
with open(args.output, "w") as f_output:
csvreader = csv.DictReader(f_input, delimiter=args.delimiter)
header = csvreader.fieldnames
header[0] = "sampleid"

rows = []
for row in csvreader:
row[header[0]] = args.sname
rows.append(row)


writer = csv.DictWriter(f_output, fieldnames=header, delimiter=args.delimiter)
writer.writeheader()
writer.writerows(rows)

if __name__ == "__main__":
main()
47 changes: 47 additions & 0 deletions bin/reformat_emmtyper_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env python

import argparse
import csv

def main():

description = "add header to emmtyper output and get rid of .tmp from the sampleid"
parser = argparse.ArgumentParser(description=description)

parser.add_argument(
"-i",
"--input",
required=True,
help=f"emmtyper csv output'\n",
)
parser.add_argument(
"-o",
"--output",
required=True,
default="",
help=f"Output file name\n"
)
args = parser.parse_args()

header = [
"sampleid",
"num_of_blast_hits",
"num_of_clusters",
"emm-type",
"emm-type-positions",
"emm-like",
"emm-like-positions",
"EMM-cluster"
]
with open(args.input, "r", encoding="utf8") as f_input:
with open(args.output, 'w', newline='') as f_output:
reader = csv.reader(f_input, delimiter='\t')
writer = csv.writer(f_output, delimiter=',')
writer.writerow(header)
for row in reader:
row[0] = row[0].rstrip(".tmp")
writer.writerow(row)


if __name__ == "__main__":
main()
Loading

0 comments on commit 2f22695

Please sign in to comment.