Address Imp example fixing #29

isblab · Jun 5, 2024 · bba92b4 · bba92b4
1 parent e38fe90
commit bba92b4
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 38 deletions.
diff --git a/examples/README.md b/examples/README.md
@@ -7,17 +7,14 @@ The `examples/input` comprises of the fasta sequences (`examples/input/fasta`),
 
 In addition it also comprises the `topology{x}.txt` files that define the representation to be used for running the `nude_modeling.py` modeling script. The `{x}` in the topology file's name corresponds to the number of amino acid residues to be coarse-grained to a single flexible bead for regions that lack a previously characterized structure. In this example, we are comparing the 1, 5, 10, 20, 30 and 50 residues per bead coarse-grained representations of the regions with unknown structure of NuDe sub-complex. It is to be noted that the regions with known structure will be modeled as 1 and 10 residues per bead representation. Importantly, note that *any* representation can be compared as long there is a valid modeling file associated with it (representation can be mentioned via a topology file or manually). 
 
-It also contains the `nestor_params_optrep.yaml` file which defines the NestOR parameters. Each parameter is described in the file itself. 
+It also contains the `examples/input/nestor_params_optrep.yaml` file which defines the NestOR parameters. Each parameter is described in the file itself. 
 
 The `nude_modeling.py` script is also adapted from the [Integrative model of the NuRD subcomplexes](https://github.com/isblab/nurd) repository for use with NestOR.
 
 The `nestor_output.yaml` contains an example output for the given setup. In addition to this file, NestOR also saves a model from each iteration. These models are not included here due to space constraints. It also generates plots visualizing the log(evidence) (mean and standard error on the mean) (`examples/trial_optrep_params_evidence_errorbarplot.png`), MCMC per step sampling time (`examples/trial_optrep_params_persteptime.png`), NestOR total process time (`examples/trial_optrep_params_proctime.png`) and per step MCMC sampling time and log(evidence) together for all candidate representations (`examples/sterr_evi_and_proctime.png`). 
 
 ## Running nested sampling on this example
-For this example, the user may choose to run a single Nested Sampling run as follows. 
+For this example, the user may run NestOR as follows:
+```
+python -m "IMP.wrapper_v{x} -p {path_to_param_file}
 ```
-{path_to_IMP_installation} python nude_modeling.py
-``` 
-This command will run one nested sampling run. 
-
-For use with the `wrapper_v{x}.py` the user will need to use modify the `nude_modeling.py` a bit to use command-line arguments. The user will need to convert the hard coded arguments in this script to `sys.argv` arguments in the correct order for the wrapper to work. Please see the comments in the `nude_modeling.py` for more details. Once the change is made, the user may run NestOR with the wrapper script by running the `example.sh` script
diff --git a/examples/nude_modeling.py b/examples/nude_modeling.py
@@ -31,17 +31,23 @@
 
 IMP.setup_from_argv(sys.argv, "Application of NestOR to the NuDe subcomplex")
 
-dat_dir = IMP.nestor.get_example_path("input")  
+dat_dir = IMP.nestor.get_example_path("input")
 
-if not os.path.exists(os.path.join(dat_dir, 'gmm/emd_22904.mrc')):
+if not os.path.exists(os.path.join(dat_dir, "gmm/emd_22904.mrc")):
     print("To run this example, first download the EM map from EMD22094,")
     print("extract it, rename it as `emd_22904.mrc` and place it in")
     print("the `input/gmm/` directory.")
     sys.exit(0)
 
-run_output_dir = "run_" + "0"  # sys.argv[1]
-topology_file = os.path.join(dat_dir, "topology50.txt")  # sys.argv[2]
-h_param_file = os.path.join(dat_dir, "nestor_params_optrep.yaml")  # sys.argv[3]
+if len(sys.argv) < 2:
+    print(
+        "Command-line arguments needed to run this script. See the readme for more details"
+    )
+    sys.exit(0)
+
+run_output_dir = "run_" + sys.argv[1]
+topology_file = os.path.join(dat_dir, sys.argv[2])
+h_param_file = os.path.join(dat_dir, sys.argv[3])
 
 max_shuffle_core = 5
 max_shuffle_set2 = 50
@@ -68,9 +74,10 @@ def modeling(output_dir, topology_file, h_param_file):
     mdl = IMP.Model()
     t = IMP.pmi.topology.TopologyReader(
         topology_file,
-        pdb_dir=os.path.join(dat_dir, 'pdb'),
-        fasta_dir=os.path.join(dat_dir, 'fasta'),
-        gmm_dir=os.path.join(dat_dir, 'gmm'))
+        pdb_dir=os.path.join(dat_dir, "pdb"),
+        fasta_dir=os.path.join(dat_dir, "fasta"),
+        gmm_dir=os.path.join(dat_dir, "gmm"),
+    )
     bs = IMP.pmi.macros.BuildSystem(mdl)
     bs.add_state(t)
 
@@ -354,7 +361,6 @@ def modeling(output_dir, topology_file, h_param_file):
     rex = IMP.pmi.macros.ReplicaExchange(
         mdl,
         root_hier=root_hier,
-
         monte_carlo_temperature=1.0,
         replica_exchange_minimum_temperature=1.0,
         replica_exchange_maximum_temperature=rex_max_temp,

diff --git a/pyext/src/compare_runs_v2_w_pyplot.py b/pyext/src/compare_runs_v2_w_pyplot.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 """@namespace IMP.nestor.compare_runs_v2_w_pyplot
    Plotting script to compare NestOR runs"""
 
@@ -113,8 +114,9 @@ def plot_sterr(results: dict):
         for run_set in results[parent]:  # runset is res_01
             log_evi = []
             for run in results[parent][run_set]:
-                log_evi.append(float(
-                    results[parent][run_set][run]["log_estimated_evidence"]))
+                log_evi.append(
+                    float(results[parent][run_set][run]["log_estimated_evidence"])
+                )
             stderr_log_evi = np.std(log_evi) / (math.sqrt(len(log_evi)))
             x_vals.append(run_set)
             y_vals.append(stderr_log_evi)

diff --git a/pyext/src/wrapper_v6.py b/pyext/src/wrapper_v6.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 """@namespace IMP.nestor.wrapper_v6
    Top-level NestOR script"""
 
@@ -64,8 +65,7 @@ def get_curr_processes_and_terminated_runs(processes: dict):
         if proc.returncode == 11 or proc.returncode == 12:
             faulty_runs.append((run_deets, proc))
             if proc.returncode == 11:
-                shutil.rmtree(os.path.join(run_deets[0],
-                                           f"run_{run_deets[1]}"))
+                shutil.rmtree(os.path.join(run_deets[0], f"run_{run_deets[1]}"))
         elif proc.returncode == 0:
             successful_runs.append((run_deets, proc))
 
@@ -129,8 +129,7 @@ def plotter(results: dict, h_params):
     plt.ylabel("Nested sampling process time")
     plt.savefig(
         os.path.join(
-            h_params["parent_dir"],
-            f"trial_{h_params['trial_name']}_proctime.png"
+            h_params["parent_dir"], f"trial_{h_params['trial_name']}_proctime.png"
         )
     )
 
@@ -143,8 +142,7 @@ def plotter(results: dict, h_params):
     plt.ylabel("Mean time per MCMC step")
     plt.savefig(
         os.path.join(
-            h_params["parent_dir"],
-            f"trial_{h_params['trial_name']}_persteptime.png"
+            h_params["parent_dir"], f"trial_{h_params['trial_name']}_persteptime.png"
         )
     )
 
@@ -191,8 +189,7 @@ def run_nested_sampling(h_param_file, topology=True):
                     os.chdir(f"run_{run_id}")
 
                     if topology:
-                        topf = \
-                            f"topology{res.split('/')[-1].split('_')[-1]}.txt"
+                        topf = f"topology{res.split('/')[-1].split('_')[-1]}.txt"
                     else:
                         topf = res.split("/")[-1].split("_")[-1]
 
@@ -241,8 +238,7 @@ def run_nested_sampling(h_param_file, topology=True):
         if len(curr_faulty_runs) != 0:
             for fr, p in curr_faulty_runs:
                 if p.returncode == 11:
-                    print(f"Will relaunch ({fr[0].split('/')[-1]}, "
-                          f"run_{fr[1]})")
+                    print(f"Will relaunch ({fr[0].split('/')[-1]}, " f"run_{fr[1]})")
                     torun.append(fr)
                 elif p.returncode == 12:
                     print(
@@ -375,8 +371,7 @@ def main():
     if not args.skip_calc:
         run_nested_sampling(h_param_file, use_topology)
 
-    with open(os.path.join(h_params["parent_dir"],
-                           "nestor_output.yaml"), "r") as outf:
+    with open(os.path.join(h_params["parent_dir"], "nestor_output.yaml"), "r") as outf:
         results = yaml.safe_load(outf)
 
     if len(list(results.keys())) > 0:

diff --git a/pyext/src/xl_datasplitter.py b/pyext/src/xl_datasplitter.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 """@namespace IMP.nestor.xl_datasplitter
    Script to split a CSV file for use in nested sampling"""
 
@@ -9,9 +10,9 @@
 
 xls = []
 header = None
-with open(xl_file, 'r') as xlf:
+with open(xl_file, "r") as xlf:
     for ln in xlf.readlines():
-        if (not ln.startswith('Protein1')) and (not ln.startswith('Linker')):
+        if (not ln.startswith("Protein1")) and (not ln.startswith("Linker")):
             xls.append(ln)
         else:
             header = ln
@@ -24,19 +25,19 @@
     else:
         sampling.append(link)
 
-fname = xl_file.split('/')[-1]
-dir_path = xl_file.split('/')
+fname = xl_file.split("/")[-1]
+dir_path = xl_file.split("/")
 if len(dir_path) > 1:
-    dir_path = '/'.join(dir_path[0:-1])
+    dir_path = "/".join(dir_path[0:-1])
 else:
-    dir_path = './'
-with open(f'{dir_path}/sampling_{fname}', 'w') as sf:
+    dir_path = "./"
+with open(f"{dir_path}/sampling_{fname}", "w") as sf:
     if header is not None:
         sf.write(header)
     for lnk in sampling:
         sf.write(lnk)
 
-with open(f'{dir_path}/evicalc_{fname}', 'w') as evif:
+with open(f"{dir_path}/evicalc_{fname}", "w") as evif:
     if header is not None:
         evif.write(header)
     for lnk in evi_calc: