From 06158dd92f59bd0f00d5739a555f752e27c51947 Mon Sep 17 00:00:00 2001 From: tina-z-jia <145156075+tina-z-jia@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:55:06 -0700 Subject: [PATCH 1/4] Update state.py --- sanclone/state.py | 80 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 3 deletions(-) diff --git a/sanclone/state.py b/sanclone/state.py index dd7b818..a3610b8 100644 --- a/sanclone/state.py +++ b/sanclone/state.py @@ -1,5 +1,79 @@ -from pydantic import BaseModel +# -*- coding: utf-8 -*- +"""get_seq_annotation.ipynb +Automatically generated by Colaboratory. -class State(BaseModel): - pass +Original file is located at + https://colab.research.google.com/drive/1dJEOj6Jw3qOHsxcsP-W3Uj7mbKhvWnHi +""" + +from Bio import Entrez +from Bio import SeqIO + + +class SequenceAnnotation: + def __init__(self, vector): + # Ensure the vector is a SeqRecord object from Biopython + if not isinstance(vector, SeqIO.SeqRecord): + raise ValueError("Input 'vector' must be a SeqRecord object from Biopython's SeqIO.") + self.vector = vector + self.linear_insert = None + # clone_seq is a DNA seq in string format + # clone is the SeqRecord with annotations + self.clone_seq = None + self.clone = None + + def store_linear_insert(self, linear_insert): + # Ensure the linear_insert is a SeqRecord object from Biopython + if not isinstance(linear_insert, SeqIO.SeqRecord): + raise ValueError("Input 'linear_insert' must be a SeqRecord object from Biopython's SeqIO.") + self.linear_insert = linear_insert + + # def store_clone_annotation(self, clone_seq): + # # Ensure the linear_insert is a SeqRecord object from Biopython + # self.clone_seq = clone_seq + + + # self.clone = + + def retrieve_vector(self): + return self.vector + + def retrieve_linear_insert(self): + return self.linear_insert + + def retrieve_clone(self): + return self.clone + # seq to annotation + + +def download_genbank_file(accession, filename): + Entrez.email = "tina.zetong.jia@example.com" # Always provide your email address when using NCBI's services + with Entrez.efetch(db="nucleotide", id=accession, rettype="gb", retmode="text") as handle: + with open(filename, 'w') as outfile: + outfile.write(handle.read()) + + +# example + +accession_id_vector = "NC_005213" +output_filename_vector = "NC_005213.gbk" +accession_id_linear_insert = "NC_000932" +output_filename_linear_insert = "NC_000932.gbk" +download_genbank_file(accession_id_vector, output_filename_vector) +download_genbank_file(accession_id_linear_insert, output_filename_linear_insert) + +for gb_record in SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank") : + # now do something with the record + print ("Name %s, %i features" % (gb_record.name, len(gb_record.features))) + +vector_seq = list(SeqIO.parse(open(output_filename_vector,"r"), "genbank")) +insert_seq = list(SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank")) + +seq_anno = SequenceAnnotation(vector_seq[0]) +seq_anno.store_linear_insert(insert_seq[0]) + +retrieved_vector = seq_anno.retrieve_vector() +retrieved_insert = seq_anno.retrieve_linear_insert() +print(retrieved_vector) +print(retrieved_insert) From b4ffa299c0633aece4dd30f873252849b4f42431 Mon Sep 17 00:00:00 2001 From: tina-z-jia <145156075+tina-z-jia@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:58:10 -0700 Subject: [PATCH 2/4] Update class name to State --- sanclone/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sanclone/state.py b/sanclone/state.py index a3610b8..488a8ba 100644 --- a/sanclone/state.py +++ b/sanclone/state.py @@ -11,7 +11,7 @@ from Bio import SeqIO -class SequenceAnnotation: +class State: def __init__(self, vector): # Ensure the vector is a SeqRecord object from Biopython if not isinstance(vector, SeqIO.SeqRecord): @@ -70,7 +70,7 @@ def download_genbank_file(accession, filename): vector_seq = list(SeqIO.parse(open(output_filename_vector,"r"), "genbank")) insert_seq = list(SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank")) -seq_anno = SequenceAnnotation(vector_seq[0]) +seq_anno = State(vector_seq[0]) seq_anno.store_linear_insert(insert_seq[0]) retrieved_vector = seq_anno.retrieve_vector() From a7331b193b484f7c2bfe80de27947a8ad72dc43b Mon Sep 17 00:00:00 2001 From: tina-z-jia <145156075+tina-z-jia@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:59:53 -0700 Subject: [PATCH 3/4] moving example to another file --- sanclone/state.py | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/sanclone/state.py b/sanclone/state.py index 488a8ba..7669c9f 100644 --- a/sanclone/state.py +++ b/sanclone/state.py @@ -53,27 +53,3 @@ def download_genbank_file(accession, filename): with open(filename, 'w') as outfile: outfile.write(handle.read()) - -# example - -accession_id_vector = "NC_005213" -output_filename_vector = "NC_005213.gbk" -accession_id_linear_insert = "NC_000932" -output_filename_linear_insert = "NC_000932.gbk" -download_genbank_file(accession_id_vector, output_filename_vector) -download_genbank_file(accession_id_linear_insert, output_filename_linear_insert) - -for gb_record in SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank") : - # now do something with the record - print ("Name %s, %i features" % (gb_record.name, len(gb_record.features))) - -vector_seq = list(SeqIO.parse(open(output_filename_vector,"r"), "genbank")) -insert_seq = list(SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank")) - -seq_anno = State(vector_seq[0]) -seq_anno.store_linear_insert(insert_seq[0]) - -retrieved_vector = seq_anno.retrieve_vector() -retrieved_insert = seq_anno.retrieve_linear_insert() -print(retrieved_vector) -print(retrieved_insert) From b87c21fd7ae632230b1377a2b98b92aa85be8ff5 Mon Sep 17 00:00:00 2001 From: Tina Jia Date: Fri, 6 Oct 2023 16:20:33 -0700 Subject: [PATCH 4/4] default constructor --- sanclone/state.py | 28 ++++++++++++---------------- tests/test_sanity.py | 25 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/sanclone/state.py b/sanclone/state.py index 7669c9f..657cedb 100644 --- a/sanclone/state.py +++ b/sanclone/state.py @@ -12,16 +12,15 @@ class State: - def __init__(self, vector): - # Ensure the vector is a SeqRecord object from Biopython - if not isinstance(vector, SeqIO.SeqRecord): - raise ValueError("Input 'vector' must be a SeqRecord object from Biopython's SeqIO.") - self.vector = vector + def __init__(self): + self.vector = None self.linear_insert = None - # clone_seq is a DNA seq in string format - # clone is the SeqRecord with annotations self.clone_seq = None - self.clone = None + + def store_vector(self, vector): + if not isinstance(vector, SeqIO.SeqRecord): + raise ValueError("Input 'vector' must be a SeqRecord object from Biopython's SeqIO.") + self.vector = vector def store_linear_insert(self, linear_insert): # Ensure the linear_insert is a SeqRecord object from Biopython @@ -29,12 +28,9 @@ def store_linear_insert(self, linear_insert): raise ValueError("Input 'linear_insert' must be a SeqRecord object from Biopython's SeqIO.") self.linear_insert = linear_insert - # def store_clone_annotation(self, clone_seq): - # # Ensure the linear_insert is a SeqRecord object from Biopython - # self.clone_seq = clone_seq - - - # self.clone = + def store_clone_seq(self, clone_seq): + # Ensure the linear_insert is a SeqRecord object from Biopython + self.clone_seq = clone_seq def retrieve_vector(self): return self.vector @@ -42,8 +38,8 @@ def retrieve_vector(self): def retrieve_linear_insert(self): return self.linear_insert - def retrieve_clone(self): - return self.clone + def retrieve_clone_seq(self): + return self.clone_seq # seq to annotation diff --git a/tests/test_sanity.py b/tests/test_sanity.py index 120d2d1..3cf8939 100644 --- a/tests/test_sanity.py +++ b/tests/test_sanity.py @@ -11,3 +11,28 @@ def test_echo_tool(): tool = EchoTool(shared_state=State()) assert tool.run("Hello") == "Hello" + +# def test_state_tool(): +# from sanclone import State +# from sanclone.State import download_genbank_file +# accession_id_vector = "NC_005213" +# output_filename_vector = "NC_005213.gbk" +# accession_id_linear_insert = "NC_000932" +# output_filename_linear_insert = "NC_000932.gbk" +# download_genbank_file(accession_id_vector, output_filename_vector) +# download_genbank_file(accession_id_linear_insert, output_filename_linear_insert) + +# for gb_record in SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank") : +# # now do something with the record +# print ("Name %s, %i features" % (gb_record.name, len(gb_record.features))) + +# vector_seq = list(SeqIO.parse(open(output_filename_vector,"r"), "genbank")) +# insert_seq = list(SeqIO.parse(open(output_filename_linear_insert,"r"), "genbank")) + +# seq_anno = State(vector_seq[0]) +# seq_anno.store_linear_insert(insert_seq[0]) + +# retrieved_vector = seq_anno.retrieve_vector() +# retrieved_insert = seq_anno.retrieve_linear_insert() +# print(retrieved_vector) +# print(retrieved_insert) \ No newline at end of file