Skip to content

Commit

Permalink
Address Imp example fixing #29
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyas-arvindekar committed Jun 5, 2024
1 parent e38fe90 commit bba92b4
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 38 deletions.
11 changes: 4 additions & 7 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,14 @@ The `examples/input` comprises of the fasta sequences (`examples/input/fasta`),

In addition it also comprises the `topology{x}.txt` files that define the representation to be used for running the `nude_modeling.py` modeling script. The `{x}` in the topology file's name corresponds to the number of amino acid residues to be coarse-grained to a single flexible bead for regions that lack a previously characterized structure. In this example, we are comparing the 1, 5, 10, 20, 30 and 50 residues per bead coarse-grained representations of the regions with unknown structure of NuDe sub-complex. It is to be noted that the regions with known structure will be modeled as 1 and 10 residues per bead representation. Importantly, note that *any* representation can be compared as long there is a valid modeling file associated with it (representation can be mentioned via a topology file or manually).

It also contains the `nestor_params_optrep.yaml` file which defines the NestOR parameters. Each parameter is described in the file itself.
It also contains the `examples/input/nestor_params_optrep.yaml` file which defines the NestOR parameters. Each parameter is described in the file itself.

The `nude_modeling.py` script is also adapted from the [Integrative model of the NuRD subcomplexes](https://github.com/isblab/nurd) repository for use with NestOR.

The `nestor_output.yaml` contains an example output for the given setup. In addition to this file, NestOR also saves a model from each iteration. These models are not included here due to space constraints. It also generates plots visualizing the log(evidence) (mean and standard error on the mean) (`examples/trial_optrep_params_evidence_errorbarplot.png`), MCMC per step sampling time (`examples/trial_optrep_params_persteptime.png`), NestOR total process time (`examples/trial_optrep_params_proctime.png`) and per step MCMC sampling time and log(evidence) together for all candidate representations (`examples/sterr_evi_and_proctime.png`).

## Running nested sampling on this example
For this example, the user may choose to run a single Nested Sampling run as follows.
For this example, the user may run NestOR as follows:
```
python -m "IMP.wrapper_v{x} -p {path_to_param_file}
```
{path_to_IMP_installation} python nude_modeling.py
```
This command will run one nested sampling run.

For use with the `wrapper_v{x}.py` the user will need to use modify the `nude_modeling.py` a bit to use command-line arguments. The user will need to convert the hard coded arguments in this script to `sys.argv` arguments in the correct order for the wrapper to work. Please see the comments in the `nude_modeling.py` for more details. Once the change is made, the user may run NestOR with the wrapper script by running the `example.sh` script
24 changes: 15 additions & 9 deletions examples/nude_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,23 @@

IMP.setup_from_argv(sys.argv, "Application of NestOR to the NuDe subcomplex")

dat_dir = IMP.nestor.get_example_path("input")
dat_dir = IMP.nestor.get_example_path("input")

if not os.path.exists(os.path.join(dat_dir, 'gmm/emd_22904.mrc')):
if not os.path.exists(os.path.join(dat_dir, "gmm/emd_22904.mrc")):
print("To run this example, first download the EM map from EMD22094,")
print("extract it, rename it as `emd_22904.mrc` and place it in")
print("the `input/gmm/` directory.")
sys.exit(0)

run_output_dir = "run_" + "0" # sys.argv[1]
topology_file = os.path.join(dat_dir, "topology50.txt") # sys.argv[2]
h_param_file = os.path.join(dat_dir, "nestor_params_optrep.yaml") # sys.argv[3]
if len(sys.argv) < 2:
print(
"Command-line arguments needed to run this script. See the readme for more details"
)
sys.exit(0)

run_output_dir = "run_" + sys.argv[1]
topology_file = os.path.join(dat_dir, sys.argv[2])
h_param_file = os.path.join(dat_dir, sys.argv[3])

max_shuffle_core = 5
max_shuffle_set2 = 50
Expand All @@ -68,9 +74,10 @@ def modeling(output_dir, topology_file, h_param_file):
mdl = IMP.Model()
t = IMP.pmi.topology.TopologyReader(
topology_file,
pdb_dir=os.path.join(dat_dir, 'pdb'),
fasta_dir=os.path.join(dat_dir, 'fasta'),
gmm_dir=os.path.join(dat_dir, 'gmm'))
pdb_dir=os.path.join(dat_dir, "pdb"),
fasta_dir=os.path.join(dat_dir, "fasta"),
gmm_dir=os.path.join(dat_dir, "gmm"),
)
bs = IMP.pmi.macros.BuildSystem(mdl)
bs.add_state(t)

Expand Down Expand Up @@ -354,7 +361,6 @@ def modeling(output_dir, topology_file, h_param_file):
rex = IMP.pmi.macros.ReplicaExchange(
mdl,
root_hier=root_hier,

monte_carlo_temperature=1.0,
replica_exchange_minimum_temperature=1.0,
replica_exchange_maximum_temperature=rex_max_temp,
Expand Down
6 changes: 4 additions & 2 deletions pyext/src/compare_runs_v2_w_pyplot.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
"""@namespace IMP.nestor.compare_runs_v2_w_pyplot
Plotting script to compare NestOR runs"""

Expand Down Expand Up @@ -113,8 +114,9 @@ def plot_sterr(results: dict):
for run_set in results[parent]: # runset is res_01
log_evi = []
for run in results[parent][run_set]:
log_evi.append(float(
results[parent][run_set][run]["log_estimated_evidence"]))
log_evi.append(
float(results[parent][run_set][run]["log_estimated_evidence"])
)
stderr_log_evi = np.std(log_evi) / (math.sqrt(len(log_evi)))
x_vals.append(run_set)
y_vals.append(stderr_log_evi)
Expand Down
19 changes: 7 additions & 12 deletions pyext/src/wrapper_v6.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
"""@namespace IMP.nestor.wrapper_v6
Top-level NestOR script"""

Expand Down Expand Up @@ -64,8 +65,7 @@ def get_curr_processes_and_terminated_runs(processes: dict):
if proc.returncode == 11 or proc.returncode == 12:
faulty_runs.append((run_deets, proc))
if proc.returncode == 11:
shutil.rmtree(os.path.join(run_deets[0],
f"run_{run_deets[1]}"))
shutil.rmtree(os.path.join(run_deets[0], f"run_{run_deets[1]}"))
elif proc.returncode == 0:
successful_runs.append((run_deets, proc))

Expand Down Expand Up @@ -129,8 +129,7 @@ def plotter(results: dict, h_params):
plt.ylabel("Nested sampling process time")
plt.savefig(
os.path.join(
h_params["parent_dir"],
f"trial_{h_params['trial_name']}_proctime.png"
h_params["parent_dir"], f"trial_{h_params['trial_name']}_proctime.png"
)
)

Expand All @@ -143,8 +142,7 @@ def plotter(results: dict, h_params):
plt.ylabel("Mean time per MCMC step")
plt.savefig(
os.path.join(
h_params["parent_dir"],
f"trial_{h_params['trial_name']}_persteptime.png"
h_params["parent_dir"], f"trial_{h_params['trial_name']}_persteptime.png"
)
)

Expand Down Expand Up @@ -191,8 +189,7 @@ def run_nested_sampling(h_param_file, topology=True):
os.chdir(f"run_{run_id}")

if topology:
topf = \
f"topology{res.split('/')[-1].split('_')[-1]}.txt"
topf = f"topology{res.split('/')[-1].split('_')[-1]}.txt"
else:
topf = res.split("/")[-1].split("_")[-1]

Expand Down Expand Up @@ -241,8 +238,7 @@ def run_nested_sampling(h_param_file, topology=True):
if len(curr_faulty_runs) != 0:
for fr, p in curr_faulty_runs:
if p.returncode == 11:
print(f"Will relaunch ({fr[0].split('/')[-1]}, "
f"run_{fr[1]})")
print(f"Will relaunch ({fr[0].split('/')[-1]}, " f"run_{fr[1]})")
torun.append(fr)
elif p.returncode == 12:
print(
Expand Down Expand Up @@ -375,8 +371,7 @@ def main():
if not args.skip_calc:
run_nested_sampling(h_param_file, use_topology)

with open(os.path.join(h_params["parent_dir"],
"nestor_output.yaml"), "r") as outf:
with open(os.path.join(h_params["parent_dir"], "nestor_output.yaml"), "r") as outf:
results = yaml.safe_load(outf)

if len(list(results.keys())) > 0:
Expand Down
17 changes: 9 additions & 8 deletions pyext/src/xl_datasplitter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env python
"""@namespace IMP.nestor.xl_datasplitter
Script to split a CSV file for use in nested sampling"""

Expand All @@ -9,9 +10,9 @@

xls = []
header = None
with open(xl_file, 'r') as xlf:
with open(xl_file, "r") as xlf:
for ln in xlf.readlines():
if (not ln.startswith('Protein1')) and (not ln.startswith('Linker')):
if (not ln.startswith("Protein1")) and (not ln.startswith("Linker")):
xls.append(ln)
else:
header = ln
Expand All @@ -24,19 +25,19 @@
else:
sampling.append(link)

fname = xl_file.split('/')[-1]
dir_path = xl_file.split('/')
fname = xl_file.split("/")[-1]
dir_path = xl_file.split("/")
if len(dir_path) > 1:
dir_path = '/'.join(dir_path[0:-1])
dir_path = "/".join(dir_path[0:-1])
else:
dir_path = './'
with open(f'{dir_path}/sampling_{fname}', 'w') as sf:
dir_path = "./"
with open(f"{dir_path}/sampling_{fname}", "w") as sf:
if header is not None:
sf.write(header)
for lnk in sampling:
sf.write(lnk)

with open(f'{dir_path}/evicalc_{fname}', 'w') as evif:
with open(f"{dir_path}/evicalc_{fname}", "w") as evif:
if header is not None:
evif.write(header)
for lnk in evi_calc:
Expand Down

0 comments on commit bba92b4

Please sign in to comment.