-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ProcessOutput #233
base: develop
Are you sure you want to change the base?
ProcessOutput #233
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -692,59 +692,148 @@ def write_stats2(self): | |
for stat in self.dictionary_stats2.keys(): | ||
self.write_stat2(stat) | ||
|
||
|
||
class ProcessOutput(object): | ||
"""A class for reading stat files""" | ||
def __init__(self, filename): | ||
self.filename = filename | ||
self.isstat1 = False | ||
self.isstat2 = False | ||
self.isstat1=False | ||
self.isstat2=False | ||
|
||
# open the file | ||
if not self.filename is None: | ||
f = open(self.filename, "r") | ||
else: | ||
raise ValueError("No file name provided. Use -h for help") | ||
|
||
# get the keys from the first line | ||
for line in f.readlines(): | ||
d = ast.literal_eval(line) | ||
self.klist = list(d.keys()) | ||
# check if it is a stat2 file | ||
if "STAT2HEADER" in self.klist: | ||
self.isstat2 = True | ||
for k in self.klist: | ||
if "STAT2HEADER" in str(k): | ||
# if print_header: print k, d[k] | ||
del d[k] | ||
stat2_dict = d | ||
# get the list of keys sorted by value | ||
kkeys = [k[0] | ||
for k in sorted(stat2_dict.items(), key=operator.itemgetter(1))] | ||
self.klist = [k[1] | ||
for k in sorted(stat2_dict.items(), key=operator.itemgetter(1))] | ||
self.invstat2_dict = {} | ||
for k in kkeys: | ||
self.invstat2_dict.update({stat2_dict[k]: k}) | ||
else: | ||
IMP.handle_use_deprecated("statfile v1 is deprecated. " | ||
"Please convert to statfile v2.\n") | ||
self.isstat1 = True | ||
self.klist.sort() | ||
self.exp_dict={} # Store all experimental header details here. | ||
|
||
# get the {CategoryID:Category} pairs from the first line | ||
line = f.readline() | ||
|
||
#Store these keys in a dictionary. Example pair: {109 :'Total_Score'} | ||
self.dict = self.parse_line(line, header=True) | ||
#self.dict = ast.literal_eval(line) | ||
|
||
self.klist = list(self.dict.keys()) | ||
|
||
if "STAT2HEADER" in self.klist: | ||
self.isstat2 = True | ||
for k in self.klist: | ||
if "STAT2HEADER" in str(k): | ||
# if print_header: print k, d[k] | ||
self.exp_dict[k]=self.dict[k] | ||
del self.dict[k] | ||
|
||
# get the list of keys sorted by value | ||
kkeys = [k[0] | ||
for k in sorted(self.dict.items(), key=operator.itemgetter(1))] | ||
self.klist = [k[1] | ||
for k in sorted(self.dict.items(), key=operator.itemgetter(1))] | ||
self.inv_dict = {} | ||
|
||
for k in kkeys: | ||
self.inv_dict.update({self.dict[k]: k}) | ||
else: | ||
print("WARNING: statfile v1 is deprecated. Please convert to statfile v2") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nothing wrong with your patch here, but when you update it, you should use the "proper" deprecation function, as in current |
||
self.isstat1 = True | ||
self.klist.sort() | ||
# For v1, no need to map from ids to field names, so just make | ||
# a dumb one-to-one mapping so as not to confuse v2 code | ||
self.dict = {} | ||
self.inv_dict = {} | ||
for k in self.klist: | ||
self.dict[k] = k | ||
self.inv_dict[k] = k | ||
|
||
|
||
break | ||
f.close() | ||
|
||
|
||
def parse_line(self, line, header=False): | ||
# Parses a line and returns a dictionary of key:value pairs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a docstring ( |
||
# {key:value, key:value, key:"{v1, v2, v3}", key:value} | ||
d={} # output dictionary | ||
|
||
if header: | ||
# Assume that STAT2HEADER_ENVIRON is the last keyword | ||
if "STAT2HEADER_ENVIRON" in line: | ||
d['STAT2HEADER_ENVIRON']=line.split(", \'STAT2HEADER_ENVIRON\':")[1] | ||
line=line.split(", \'STAT2HEADER_ENVIRON\'")[0] | ||
|
||
# First, remove outer braces and split via double quotes to isolate multi-component values. | ||
split=line.strip()[1:-1].split("\"") | ||
|
||
if len(split)==1: | ||
fields = split[0].split(",") # split via commas to get key:value pair | ||
for h in fields: | ||
if h != "": # For some reason, there is occasionally an empty field. Ignoring these seems to work. | ||
|
||
# Split fields into key and value elements | ||
kv = h.split(":") | ||
|
||
# If the key (field 0) is an integer, keep it an integer. | ||
try: | ||
k = int(kv[0].replace(",","").strip()) | ||
except: | ||
k = kv[0].replace("\'","").strip() | ||
|
||
v = kv[1].replace("\'","").strip() # the value is encased in single quote characters, so remove these | ||
d[k] = v | ||
return d | ||
|
||
|
||
for i in range(0,len(split),2): | ||
# Each even number of split is a string of "key:value, key:value, key:value" | ||
fd = split[i] | ||
fields = fd.split(",") # split via commas to get key:value pair | ||
for h in fields[0:-1]: | ||
if h != "": # For some reason, there is occasionally an empty field. Ignoring these seems to work. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't duplicate all this code from above. Put it in a function. |
||
#print(h, h.split(": ")) | ||
|
||
# Fields are separated by ': ' | ||
kv = h.split(": ") | ||
|
||
# If the key (field 0) is an integer, keep it an integer. | ||
try: | ||
k = int(kv[0].replace(",","").replace(":","").strip()) | ||
except: | ||
k = kv[0].replace("\'","").strip() | ||
|
||
v = kv[1].replace("\'","").strip() # the value is encased in single quote characters, so remove these | ||
d[k] = v | ||
|
||
# If there is a last field contains the key for the multi-component value in split[i+1] | ||
if i < len(split)-1: | ||
d[fields[-1].split(":")[0].replace("\'","").strip()] = split[i+1] | ||
else: | ||
kv = fields[-1].split(": ") | ||
try: | ||
k = int(kv[0].replace(",","").replace(":","").strip()) | ||
except: | ||
k = kv[0].replace("\'","").strip() | ||
|
||
v = kv[1].replace("\'","").strip() # the value is encased in single quote characters, so remove these | ||
d[k] = v | ||
|
||
return d | ||
|
||
|
||
def get_keys(self): | ||
""" Returns a list of the string keys that are included in this dictionary | ||
""" | ||
self.klist = [k[1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do this only once, not each time the function is called. Easiest would just be to setup |
||
for k in sorted(self.dict.items(), key=operator.itemgetter(1))] | ||
return self.klist | ||
|
||
def show_keys(self, ncolumns=2, truncate=65): | ||
IMP.pmi.tools.print_multicolumn(self.get_keys(), ncolumns, truncate) | ||
|
||
def get_experimental_values(self): | ||
return self.exp_dict | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should add a testcase to make sure this is returning non-garbage. |
||
|
||
def get_fields(self, fields, filtertuple=None, filterout=None, get_every=1): | ||
''' | ||
Get the desired field names, and return a dictionary. | ||
|
||
@param fields desired field names | ||
@param filterout specify if you want to "grep" out something from | ||
the file, so that it is faster | ||
|
@@ -773,7 +862,7 @@ def get_fields(self, fields, filtertuple=None, filterout=None, get_every=1): | |
#if line_number % 1000 == 0: | ||
# print "ProcessOutput.get_fields: read line %s from file %s" % (str(line_number), self.filename) | ||
try: | ||
d = ast.literal_eval(line) | ||
d = eval(line) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
except: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Never use a bare |
||
print("# Warning: skipped line number " + str(line_number) + " not a valid line") | ||
continue | ||
|
@@ -804,20 +893,99 @@ def get_fields(self, fields, filtertuple=None, filterout=None, get_every=1): | |
relationship = filtertuple[1] | ||
value = filtertuple[2] | ||
if relationship == "<": | ||
if float(d[self.invstat2_dict[keytobefiltered]]) >= value: | ||
if float(d[self.inv_dict[keytobefiltered]]) >= value: | ||
continue | ||
if relationship == ">": | ||
if float(d[self.invstat2_dict[keytobefiltered]]) <= value: | ||
if float(d[self.inv_dict[keytobefiltered]]) <= value: | ||
continue | ||
if relationship == "==": | ||
if float(d[self.invstat2_dict[keytobefiltered]]) != value: | ||
if float(d[self.inv_dict[keytobefiltered]]) != value: | ||
continue | ||
|
||
[outdict[field].append(d[self.invstat2_dict[field]]) | ||
[outdict[field].append(d[self.inv_dict[field]]) | ||
for field in fields] | ||
f.close() | ||
return outdict | ||
|
||
def return_models_satisfying_criteria(self, criteria): | ||
# Given a set of criteria, return lines from the stat file that | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring |
||
# satisfy all criteria | ||
# | ||
# Criteria should as a list of tuples in the format: ("TheKeyToBeFiltered",relationship,value) | ||
# where relationship = "<", "==", or ">" | ||
# and keytobefiltered is the name of the key (rather than the integer) | ||
# | ||
# Returns a list of dictionaries | ||
output_list = [] | ||
i = 0 | ||
|
||
# print fields values | ||
f = open(self.filename, "r") | ||
line_number = 1 | ||
|
||
# skip the first line for a statfile v2 | ||
if self.isstat2: | ||
f.readline() | ||
|
||
for line in f.readlines(): | ||
append=True | ||
#fields = ast.literal_eval(line) | ||
fields = self.parse_line(line) | ||
|
||
# Loop over all criteria. If one fails, the whole line fails and do not append it. | ||
for c in criteria: | ||
if not self.does_line_pass_criteria(fields, c): | ||
append=False | ||
break | ||
if append: | ||
output_list.append(fields) | ||
|
||
return output_list | ||
|
||
def _float_string(self, c): | ||
# Returns a float if the string can be cast as such. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring |
||
# otherwise, just return the string | ||
try: | ||
float(c) | ||
except: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no bare |
||
return str(c) | ||
|
||
return float(c) | ||
|
||
|
||
def does_line_pass_criteria(self, fields, c): | ||
# Given a stat file line (as a dictionary) and a criteria tuple, decide whether | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docstring |
||
# the criteria is passed (return True) or not (return False) | ||
#print(c) | ||
key = c[0] | ||
if key not in self.get_keys(): | ||
print(key, self.get_keys()) | ||
raise Exception('ERROR: IMP.pmi.output.ProcessOutput.does_line_pass_criteria() - Key %s is not in this stat file' % (key)) | ||
|
||
# Try to cast value string as int, float or, if not, keep it as a string | ||
value = self._float_string(c[1]) | ||
|
||
|
||
comparison = c[2] | ||
if comparison not in ["==", "<", ">"]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally considered better style to use |
||
raise Exception('ERROR: IMP.pmi.output.ProcessOutput.does_line_pass_criteria() - Comparison string must be \'>\', \'<\' or \'==\', instead of %s' % (comparison)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raise a more specific exception. |
||
|
||
intkey = self.inv_dict[key] | ||
|
||
model_value = self._float_string(fields[intkey]) | ||
|
||
if type(value) is not type(model_value): | ||
raise Exception('ERROR: IMP.pmi.output.ProcessOutput.does_line_pass_criteria() - Comparison field %s is of type %s while you tried to compare it to a %s' % (key, type(model_value), type(value))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more specific exception |
||
|
||
if (comparison == "==" and model_value == value) or \ | ||
(comparison == ">" and model_value <= value) or \ | ||
(comparison == "<" and model_value >= value): | ||
return True | ||
|
||
else: | ||
return False | ||
|
||
|
||
|
||
|
||
class CrossLinkIdentifierDatabase(object): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remove, not comment out, old stuff. If we want to go back, that's what git is for.