Skip to content

Commit

Permalink
Merge pull request #937 from OpenFreeEnergy/hotfix-struct-analysis
Browse files Browse the repository at this point in the history
Hotfix: structural analysis
  • Loading branch information
IAlibay authored Sep 18, 2024
2 parents 117ce98 + f4a7930 commit f4475bf
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 34 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ jobs:
- name: "Setup Micromamba"
uses: mamba-org/setup-micromamba@v1
with:
micromamba-binary-path: ~/.local/bin/micromamba
environment-file: environment.yml
environment-name: openfe_env
cache-environment: true
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/conda_cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
- name: Setup Micromamba
uses: mamba-org/setup-micromamba@v1
with:
micromamba-binary-path: ~/.local/bin/micromamba
environment-name: openfe
create-args: >-
python=${{ matrix.python-version }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/installer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:
- name: Install constructor environment with Micromamba
uses: mamba-org/setup-micromamba@v1
with:
micromamba-binary-path: ~/.local/bin/micromamba
environment-name: installer
create-args: >-
python=3.10
Expand Down
32 changes: 32 additions & 0 deletions news/pr_937.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
**Added:**

* <news item>

**Changed:**

* Structural analysis data is no longer written to `structural_analysis.json`
but rather a 32bit numpy compressed file named `structural_analysis.npz`
(PR #937).
* Structural analysis array data is no longer directly returned in the
RelativeHybridTopologyProtocol result dictionary. Instead it should
be accessed from the serialized NPZ file `structural_analysis.npz`.
The `structural_analysis` key now contains a path to the NPZ file,
if the structural analysis did not fail (the `structural_analysis_error`
key will instead be present on failure) (PR #937).

**Deprecated:**

* <news item>

**Removed:**

* <news item>

**Fixed:**

* The RelativeHybridTopologyProtocol result unit is now much smaller,
due to the removal of structural analysis data (PR #937).

**Security:**

* <news item>
57 changes: 42 additions & 15 deletions openfe/protocols/openmm_rfe/equil_rfe_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
from openff.toolkit.topology import Molecule as OFFMolecule
from openmmtools import multistate
from typing import Optional
from openmm import unit as omm_unit
from openmm.app import PDBFile
import pathlib
from typing import Any, Iterable, Union
import openmmtools
Expand All @@ -48,7 +46,7 @@
import gufe
from gufe import (
settings, ChemicalSystem, LigandAtomMapping, Component, ComponentMapping,
SmallMoleculeComponent, ProteinComponent, SolventComponent,
SmallMoleculeComponent, SolventComponent,
)

from .equil_rfe_settings import (
Expand Down Expand Up @@ -1096,22 +1094,29 @@ def run(self, *, dry=False, verbose=True,
return {'debug': {'sampler': sampler}}

@staticmethod
def analyse(where) -> dict:
def structural_analysis(scratch, shared) -> dict:
# don't put energy analysis in here, it uses the open file reporter
# whereas structural stuff requires that the file handle is closed
analysis_out = where / 'structural_analysis.json'

ret = subprocess.run(['openfe_analysis', 'RFE_analysis',
str(where), str(analysis_out)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# TODO: we should just make openfe_analysis write an npz instead!
analysis_out = scratch / 'structural_analysis.json'

ret = subprocess.run(
[
'openfe_analysis', # CLI entry point
'RFE_analysis', # CLI option
str(shared), # Where the simulation.nc fille
str(analysis_out) # Where the analysis json file is written
],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)
if ret.returncode:
return {'structural_analysis_error': ret.stderr}

with open(analysis_out, 'rb') as f:
data = json.load(f)

savedir = pathlib.Path(where)
savedir = pathlib.Path(shared)
if d := data['protein_2D_RMSD']:
fig = plotting.plot_2D_rmsd(d)
fig.savefig(savedir / "protein_2D_RMSD.png")
Expand All @@ -1124,21 +1129,43 @@ def analyse(where) -> dict:
f3.savefig(savedir / "ligand_RMSD.png")
plt.close(f3)

return {'structural_analysis': data}
# Save to numpy compressed format (~ 6x more space efficient than JSON)
np.savez_compressed(
shared / "structural_analysis.npz",
protein_RMSD=np.asarray(
data["protein_RMSD"], dtype=np.float32
),
ligand_RMSD=np.asarray(
data["ligand_RMSD"], dtype=np.float32
),
ligand_COM_drift=np.asarray(
data["ligand_wander"], dtype=np.float32
),
protein_2D_RMSD=np.asarray(
data["protein_2D_RMSD"], dtype=np.float32
),
time_ps=np.asarray(
data["time(ps)"], dtype=np.int32
),
)

return {'structural_analysis': shared / "structural_analysis.npz"}

def _execute(
self, ctx: gufe.Context, **kwargs,
) -> dict[str, Any]:
log_system_probe(logging.INFO, paths=[ctx.scratch])

outputs = self.run(scratch_basepath=ctx.scratch,
shared_basepath=ctx.shared)

analysis_outputs = self.analyse(ctx.shared)
structural_analysis_outputs = self.structural_analysis(
ctx.scratch, ctx.shared
)

return {
'repeat_id': self._inputs['repeat_id'],
'generation': self._inputs['generation'],
**outputs,
**analysis_outputs,
**structural_analysis_outputs,
}
35 changes: 24 additions & 11 deletions openfe/tests/protocols/test_openmm_equil_rfe_protocols.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
import numpy as np
from numpy.testing import assert_allclose
import gufe
from gufe.tests.test_tokenization import GufeTokenizableTestsMixin
import json
import xml.etree.ElementTree as ET
from importlib import resources
from unittest import mock
import sys
from pathlib import Path

import gufe
import mdtraj as mdt
import numpy as np
import pytest
from openff.toolkit import Molecule
from openff.units import unit
Expand Down Expand Up @@ -249,7 +247,8 @@ def test_dry_run_gaff_vacuum(benzene_vacuum_system, toluene_vacuum_system,
mapping=benzene_to_toluene_mapping,
)
unit = list(dag.protocol_units)[0]
sampler = unit.run(dry=True)["debug"]["sampler"]
with tmpdir.as_cwd():
_ = unit.run(dry=True)["debug"]["sampler"]


@pytest.mark.slow
Expand Down Expand Up @@ -441,7 +440,7 @@ def test_confgen_mocked_fail(benzene_system, toluene_system,
@pytest.fixture(scope='session')
def tip4p_hybrid_factory(
benzene_system, toluene_system,
benzene_to_toluene_mapping, tmp_path_factory
benzene_to_toluene_mapping, tmp_path_factory,
):
"""
Hybrid system with virtual sites in the environment (waters)
Expand Down Expand Up @@ -471,9 +470,9 @@ def tip4p_hybrid_factory(
scratch_temp = tmp_path_factory.mktemp("tip4p_scratch")

dag_unit_result = dag_unit.run(
dry=True,
scratch_basepath=scratch_temp,
shared_basepath=shared_temp,
dry=True,
scratch_basepath=scratch_temp,
shared_basepath=shared_temp,
)

return dag_unit_result['debug']['sampler']._factory
Expand Down Expand Up @@ -1545,7 +1544,9 @@ def test_reload_protocol_result(self, rfe_transformation_json):
d = json.loads(rfe_transformation_json,
cls=gufe.tokenization.JSON_HANDLER.decoder)

pr = openmm_rfe.RelativeHybridTopologyProtocolResult.from_dict(d['protocol_result'])
pr = openmm_rfe.RelativeHybridTopologyProtocolResult.from_dict(
d['protocol_result']
)

assert pr

Expand Down Expand Up @@ -1642,6 +1643,7 @@ def test_filenotfound_replica_states(self, protocolresult):
with pytest.raises(ValueError, match=errmsg):
protocolresult.get_replica_states()


@pytest.mark.parametrize('mapping_name,result', [
["benzene_to_toluene_mapping", 0],
["benzene_to_benzoic_mapping", 1],
Expand All @@ -1659,12 +1661,12 @@ def test_get_charge_difference(mapping_name, result, request):
val = _get_alchemical_charge_difference(
mapping, 'pme', True, openfe.SolventComponent()
)
assert result == pytest.approx(result)
assert result == pytest.approx(val)
else:
val = _get_alchemical_charge_difference(
mapping, 'pme', True, openfe.SolventComponent()
)
assert result == pytest.approx(result)
assert result == pytest.approx(val)


def test_get_charge_difference_no_pme(benzene_to_benzoic_mapping):
Expand Down Expand Up @@ -2102,3 +2104,14 @@ def test_dry_run_complex_alchemwater_totcharge(
assert len(htf._atom_classes['core_atoms']) == core_atoms
assert len(htf._atom_classes['unique_new_atoms']) == new_uniq
assert len(htf._atom_classes['unique_old_atoms']) == old_uniq


def test_structural_analysis_error(tmpdir):

with tmpdir.as_cwd():
ret = openmm_rfe.RelativeHybridTopologyProtocolUnit.structural_analysis(
Path('.'), Path('.')
)

assert 'structural_analysis_error' in ret
assert 'structural_analysis' not in ret
50 changes: 42 additions & 8 deletions openfe/tests/protocols/test_openmm_rfe_slow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# This code is part of OpenFE and is licensed under the MIT license.
# For details, see https://github.com/OpenFreeEnergy/openfe

import numpy as np
from numpy.testing import assert_equal
from gufe.protocols import execute_DAG
import pytest
from openff.units import unit
Expand Down Expand Up @@ -42,8 +43,8 @@ def test_openmm_run_engine(benzene_vacuum_system, platform,
if platform not in available_platforms:
pytest.skip(f"OpenMM Platform: {platform} not available")
# this test actually runs MD
# if this passes, you're 99% likely to have a good time
# these settings are a small self to self sim, that has enough eq that it doesn't occasionally crash
# these settings are a small self to self sim, that has enough eq that
# it doesn't occasionally crash
s = openfe.protocols.openmm_rfe.RelativeHybridTopologyProtocol.default_settings()
s.simulation_settings.equilibration_length = 0.1 * unit.picosecond
s.simulation_settings.production_length = 0.1 * unit.picosecond
Expand All @@ -64,10 +65,16 @@ def test_openmm_run_engine(benzene_vacuum_system, platform,
'ligand': b_alt
})

m = openfe.LigandAtomMapping(componentA=b, componentB=b_alt,
componentA_to_componentB={i: i for i in range(12)})
dag = p.create(stateA=benzene_vacuum_system, stateB=benzene_vacuum_alt_system,
mapping=[m])
m = openfe.LigandAtomMapping(
componentA=b,
componentB=b_alt,
componentA_to_componentB={i: i for i in range(12)}
)
dag = p.create(
stateA=benzene_vacuum_system,
stateB=benzene_vacuum_alt_system,
mapping=[m]
)

cwd = pathlib.Path(str(tmpdir))
r = execute_DAG(dag, shared_basedir=cwd, scratch_basedir=cwd,
Expand All @@ -78,13 +85,40 @@ def test_openmm_run_engine(benzene_vacuum_system, platform,
unit_shared = tmpdir / f"shared_{pur.source_key}_attempt_0"
assert unit_shared.exists()
assert pathlib.Path(unit_shared).is_dir()

# Check the checkpoint file exists
checkpoint = pur.outputs['last_checkpoint']
assert checkpoint == "checkpoint.chk"
assert (unit_shared / checkpoint).exists()

# Check the nc simulation file exists
# TODO: assert the number of frames
nc = pur.outputs['nc']
assert nc == unit_shared / "simulation.nc"
assert nc.exists()
assert (unit_shared / "structural_analysis.json").exists()

# Check structural analysis contents
structural_analysis_file = unit_shared / "structural_analysis.npz"
assert (structural_analysis_file).exists()
assert pur.outputs['structural_analysis'] == structural_analysis_file

structural_data = np.load(pur.outputs['structural_analysis'])
structural_keys = [
'protein_RMSD', 'ligand_RMSD', 'ligand_COM_drift',
'protein_2D_RMSD', 'time_ps'
]
for key in structural_keys:
assert key in structural_data.keys()

# 6 frames being written to file
# Note: the values of this next test are wrong, but in an expected way
# See: https://github.com/OpenFreeEnergy/openfe_analysis/issues/33
assert_equal(structural_data['time_ps'], [0, 50, 100, 150, 200, 250])
assert structural_data['ligand_RMSD'].shape == (11, 6)
assert structural_data['ligand_COM_drift'].shape == (11, 6)
# No protein so should be empty
assert structural_data['protein_RMSD'].size == 0
assert structural_data['protein_2D_RMSD'].size == 0

# Test results methods that need files present
results = p.gather([r])
Expand Down

0 comments on commit f4475bf

Please sign in to comment.