Merge branch 'main' into cookbook-flowchart

OpenFreeEnergy · Aug 28, 2023 · 90026cc · 90026cc
2 parents 4e0de9d + d21c6d2
commit 90026cc
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 86 deletions.
diff --git a/docs/cookbook/creating_atom_mappings.rst b/docs/cookbook/creating_atom_mappings.rst
@@ -110,8 +110,8 @@ Scoring Mappings
 With many possible mappings,
 and many ligand pairs we could form mappings between,
 we use **scorers** to rate if a mapping is a good idea.
-These take a ``LigandAtomMapping`` object and return a value from 0.0 (indicating a great mapping)
-to 1.0 (indicating a terrible mapping).
+These take a ``LigandAtomMapping`` object and return a value from 0.0 (indicating a terrible mapping)
+to 1.0 (indicating a great mapping), and so can be used as objective functions for optimizing ligand networks.
 
 Again, the scoring functions from Lomap are included in the ``openfe`` package.
 The :func:`default_lomap_score` function combines many different criteria together

diff --git a/openfe/setup/atom_mapping/lomap_scorers.py b/openfe/setup/atom_mapping/lomap_scorers.py
@@ -22,26 +22,24 @@
 }
 
 
-def ecr_score(mapping: LigandAtomMapping):
+def ecr_score(mapping: LigandAtomMapping) -> float:
     molA = mapping.componentA.to_rdkit()
     molB = mapping.componentB.to_rdkit()
 
-    return 1 - _dbmol.ecr(molA, molB)
+    return _dbmol.ecr(molA, molB)
 
 
-def mcsr_score(mapping: LigandAtomMapping, beta: float = 0.1):
+def mcsr_score(mapping: LigandAtomMapping, beta: float = 0.1) -> float:
     """Maximum command substructure rule
 
-    This rule was originally defined as::
+    This rule is defined as::
 
         mcsr = exp( - beta * (n1 + n2 - 2 * n_common))
 
     Where n1 and n2 are the number of atoms in each molecule, and n_common
     the number of atoms in the MCS.
 
     Giving a value in the range [0, 1.0], with 1.0 being complete agreement
-
-    This is turned into a score by simply returning (1-mcsr)
     """
     molA = mapping.componentA.to_rdkit()
     molB = mapping.componentB.to_rdkit()
@@ -58,10 +56,10 @@ def mcsr_score(mapping: LigandAtomMapping, beta: float = 0.1):
 
     mcsr = math.exp(-beta * (n1 + n2 - 2 * n_common))
 
-    return 1 - mcsr
+    return mcsr
 
 
-def mncar_score(mapping: LigandAtomMapping, ths: int = 4):
+def mncar_score(mapping: LigandAtomMapping, ths: int = 4) -> float:
     """Minimum number of common atoms rule
 
     Parameters
@@ -83,22 +81,22 @@ def mncar_score(mapping: LigandAtomMapping, ths: int = 4):
 
     ok = (n_common > ths) or (n1 < ths + 3) or (n2 < ths + 3)
 
-    return 0.0 if ok else 1.0
+    return 1.0 if ok else 0.0
 
 
 def tmcsr_score(self, mapping: LigandAtomMapping):
     raise NotImplementedError
 
 
 def atomic_number_score(mapping: LigandAtomMapping, beta=0.1,
-                        difficulty=None):
+                        difficulty=None) -> float:
     """A score on the elemental changes happening in the mapping
 
     For each transmuted atom, a mismatch score is summed, according to the
     difficulty scores (see difficult parameter).  The final score is then
     given as:
 
-    score = 1 - exp(-beta * mismatch)
+    score = exp(-beta * mismatch)
 
     Parameters
     ----------
@@ -151,17 +149,15 @@ def atomic_number_score(mapping: LigandAtomMapping, beta=0.1,
 
         nmismatch += 1 - diff
 
-    atomic_number_rule = math.exp(-beta * nmismatch)
-
-    return 1 - atomic_number_rule
+    return math.exp(-beta * nmismatch)
 
 
-def hybridization_score(mapping: LigandAtomMapping, beta=0.15):
+def hybridization_score(mapping: LigandAtomMapping, beta=0.15) -> float:
     """
 
     Score calculated as:
 
-    1 - math.exp(-beta * nmismatch)
+    math.exp(-beta * nmismatch)
 
     Parameters
     ----------
@@ -198,15 +194,21 @@ def hybridization_score(mapping: LigandAtomMapping, beta=0.15):
         if mismatch:
             nmismatch += 1
 
-    hybridization_rule = math.exp(- beta * nmismatch)
+    return math.exp(- beta * nmismatch)
 
-    return 1 - hybridization_rule
 
-
-def sulfonamides_score(mapping: LigandAtomMapping, beta=0.4):
+def sulfonamides_score(mapping: LigandAtomMapping, beta=0.4) -> float:
     """Checks if a sulfonamide appears and disallow this.
 
-    Returns (1 - math.exp(- beta)) if this happens, else 0
+    Returns ``math.exp(- beta)`` if a sulfonamide group is mutated in or out, 
+    otherwise 1.0. Testing has shown that growing a sulfonamide from scratch 
+    performs very badly.
+
+    Parameters
+    ==========
+    beta
+        A positive float describing how much to penalise a growing sulfonamide.
+        Smaller values indicate exponentially larger penalties. 
     """
     molA = mapping.componentA.to_rdkit()
     molB = mapping.componentB.to_rdkit()
@@ -235,17 +237,17 @@ def has_sulfonamide(mol):
         remB.RemoveAtom(j)
 
     if has_sulfonamide(remA.GetMol()) or has_sulfonamide(remB.GetMol()):
-        return 1 - math.exp(-beta)
+        return math.exp(-beta)
     else:
-        return 0
+        return 1.0
 
 
-def heterocycles_score(mapping: LigandAtomMapping, beta=0.4):
+def heterocycles_score(mapping: LigandAtomMapping, beta=0.4) -> float:
     """Checks if a heterocycle is formed from a -H
 
     Pyrrole, furan and thiophene *are* pemitted however
 
-    Returns 1 if this happens, else 0
+    Returns ``math.exp(-beta)`` if this happens, else 1.0
     """
     molA = mapping.componentA.to_rdkit()
     molB = mapping.componentB.to_rdkit()
@@ -284,20 +286,20 @@ def creates_heterocyle(mol):
 
     if (creates_heterocyle(remA.GetMol()) or
             creates_heterocyle(remB.GetMol())):
-        return 1 - math.exp(- beta)
+        return math.exp(- beta)
     else:
-        return 0
+        return 1.0
 
 
 def transmuting_methyl_into_ring_score(mapping: LigandAtomMapping,
-                                       beta=0.1, penalty=6.0):
+                                       beta=0.1, penalty=6.0) -> float:
     """Penalises having a non-mapped ring atoms become a non-ring
 
     This score would for example penalise R-CH3 to R-Ph where R is the same
     mapped atom and both CH3 and Ph are unmapped. Does not penalise R-H to R-Ph.
     If any atoms trigger the rule returns a score of::
 
-      1 - exp(-1 * beta * penalty)
+      exp(-1 * beta * penalty)
 
     Parameters
     ----------
@@ -346,12 +348,12 @@ def transmuting_methyl_into_ring_score(mapping: LigandAtomMapping,
                     ringbreak = True
 
     if not ringbreak:
-        return 0
+        return 1.0
     else:
-        return 1 - math.exp(- beta * penalty)
+        return math.exp(- beta * penalty)
 
 
-def transmuting_ring_sizes_score(mapping: LigandAtomMapping):
+def transmuting_ring_sizes_score(mapping: LigandAtomMapping) -> float:
     """Checks if mapping alters a ring size"""
     molA = mapping.componentA.to_rdkit()
     molB = mapping.componentB.to_rdkit()
@@ -401,27 +403,37 @@ def gen_ringdict(mol):
                         ringdictB[otherB.GetIdx()]):
                     is_bad = True
 
-    return 1 - 0.1 if is_bad else 0
+    return 0.1 if is_bad else 1.0
 
 
-def default_lomap_score(mapping: LigandAtomMapping):
+def default_lomap_score(mapping: LigandAtomMapping) -> float:
     """The default score function from Lomap2
 
-    Note
-    ----
-    Like other scores, relative to the original Lomap this is (1 - score)
-    I.e. high values are "bad", low values are "good"
+
+    This score is a combination of many rules combined and considers factors such as the
+    number of heavy atoms in common, if ring sizes are changed or rings are broken,
+    or if other alchemically unwise transformations are attempted.
+       
+    Parameters
+    ----------
+    mapping : LigandAtomMapping
+      The atom mapping to score.
+
+    Returns
+    -------
+    score : float
+       A rating of how good this mapping is, from 0.0 (terrible) to 1.0 (great).
     """
     score = math.prod((
-        1 - ecr_score(mapping),
-        1 - mncar_score(mapping),
-        1 - mcsr_score(mapping),
-        1 - atomic_number_score(mapping),
-        1 - hybridization_score(mapping),
-        1 - sulfonamides_score(mapping),
-        1 - heterocycles_score(mapping),
-        1 - transmuting_methyl_into_ring_score(mapping),
-        1 - transmuting_ring_sizes_score(mapping)
+        ecr_score(mapping),
+        mncar_score(mapping),
+        mcsr_score(mapping),
+        atomic_number_score(mapping),
+        hybridization_score(mapping),
+        sulfonamides_score(mapping),
+        heterocycles_score(mapping),
+        transmuting_methyl_into_ring_score(mapping),
+        transmuting_ring_sizes_score(mapping)
     ))
 
-    return 1 - score
+    return score
diff --git a/openfe/setup/ligand_network_planning.py b/openfe/setup/ligand_network_planning.py
@@ -38,7 +38,7 @@ def _hasten_lomap(mapper, ligands):
 def generate_radial_network(ligands: Iterable[SmallMoleculeComponent],
                             central_ligand: SmallMoleculeComponent,
                             mappers: Union[AtomMapper, Iterable[AtomMapper]],
-                            scorer=None):
+                            scorer=None) -> LigandNetwork:
     """Generate a radial network with all ligands connected to a central node
 
     Also known as hub and spoke or star-map, this plans a LigandNetwork where
@@ -54,7 +54,7 @@ def generate_radial_network(ligands: Iterable[SmallMoleculeComponent],
       mapper(s) to use, at least 1 required
     scorer : scoring function, optional
       a callable which returns a float for any LigandAtomMapping.  Used to
-      assign scores to potential mappings, higher scores indicate worse
+      assign scores to potential mappings; higher scores indicate better
       mappings.
 
     Raises
@@ -80,7 +80,7 @@ def generate_radial_network(ligands: Iterable[SmallMoleculeComponent],
     edges = []
 
     for ligand in ligands:
-        best_score = math.inf
+        best_score = 0.0
         best_mapping = None
 
         for mapping in itertools.chain.from_iterable(
@@ -94,7 +94,7 @@ def generate_radial_network(ligands: Iterable[SmallMoleculeComponent],
             score = scorer(mapping)
             mapping = mapping.with_annotations({"score": score})
 
-            if score < best_score:
+            if score > best_score:
                 best_mapping = mapping
                 best_score = score
 
@@ -110,8 +110,7 @@ def generate_maximal_network(
     mappers: Union[AtomMapper, Iterable[AtomMapper]],
     scorer: Optional[Callable[[LigandAtomMapping], float]] = None,
     progress: Union[bool, Callable[[Iterable], Iterable]] = True,
-    # allow_disconnected=True
-):
+) -> LigandNetwork:
     """Create a network with all possible proposed mappings.
 
     This will attempt to create (and optionally score) all possible mappings
@@ -129,8 +128,7 @@ def generate_maximal_network(
       the ligands to include in the LigandNetwork
     mappers : AtomMapper or Iterable[AtomMapper]
       the AtomMapper(s) to use to propose mappings.  At least 1 required,
-      but many can be given, in which case all will be tried to find the
-      lowest score edges
+      but many can be given.
     scorer : Scoring function
       any callable which takes a LigandAtomMapping and returns a float
     progress : Union[bool, Callable[Iterable], Iterable]
@@ -173,8 +171,8 @@ def generate_minimal_spanning_network(
     mappers: Union[AtomMapper, Iterable[AtomMapper]],
     scorer: Callable[[LigandAtomMapping], float],
     progress: Union[bool, Callable[[Iterable], Iterable]] = True,
-):
-    """Plan a LigandNetwork which connects all ligands with minimal cost
+) -> LigandNetwork:
+    """Connects all ligands with as few edges as possible with maximum score
 
     Parameters
     ----------
@@ -183,7 +181,7 @@ def generate_minimal_spanning_network(
     mappers : AtomMapper or Iterable[AtomMapper]
       the AtomMapper(s) to use to propose mappings.  At least 1 required,
       but many can be given, in which case all will be tried to find the
-      lowest score edges
+      highest score edges
     scorer : Scoring function
       any callable which takes a LigandAtomMapping and returns a float
     progress : Union[bool, Callable[Iterable], Iterable]
@@ -199,17 +197,21 @@ def generate_minimal_spanning_network(
     # First create a network with all the proposed mappings (scored)
     network = generate_maximal_network(ligands, mappers, scorer, progress)
 
+    # Flip network scores so we can use minimal algorithm
+    g2 = nx.MultiGraph()
+    for e1, e2, d in network.graph.edges(data=True):
+        g2.add_edge(e1, e2, weight=-d['score'], object=d['object'])
+
     # Next analyze that network to create minimal spanning network. Because
     # we carry the original (directed) LigandAtomMapping, we don't lose
     # direction information when converting to an undirected graph.
-    min_edges = nx.minimum_spanning_edges(nx.MultiGraph(network.graph),
-                                          weight='score')
+    min_edges = nx.minimum_spanning_edges(g2)
     min_mappings = [edge_data['object'] for _, _, _, edge_data in min_edges]
     min_network = LigandNetwork(min_mappings)
     missing_nodes = set(network.nodes) - set(min_network.nodes)
     if missing_nodes:
         raise RuntimeError("Unable to create edges to some nodes: "
-                           + str(list(missing_nodes)))
+                           f"{list(missing_nodes)}")
 
     return min_network
 

diff --git a/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py b/openfe/tests/protocols/test_openmm_equil_rfe_protocols.py
@@ -238,6 +238,7 @@ def test_dry_run_gaff_vacuum(benzene_vacuum_system, toluene_vacuum_system,
         sampler = unit.run(dry=True)['debug']['sampler']
 
 
+@pytest.mark.slow
 def test_dry_many_molecules_solvent(
     benzene_many_solv_system, toluene_many_solv_system,
     benzene_to_toluene_mapping, tmpdir
@@ -435,6 +436,7 @@ def test_dry_run_ligand_tip4p(benzene_system, toluene_system,
         assert sampler._factory.hybrid_system
 
 
+@pytest.mark.flaky(reruns=3)  # bad minimisation can happen
 def test_dry_run_user_charges(benzene_modifications, tmpdir):
     """
     Create a hybrid system with a set of fictitious user supplied charges