From 5a04ba02f1a6e33ec7d6d43cf10a947a030e680a Mon Sep 17 00:00:00 2001 From: jessbade Date: Fri, 1 Dec 2023 11:40:06 -0800 Subject: [PATCH 01/18] Add configuration funcs for rdkit md --- isicle/md.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index ab865d7..b9b602e 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -4,6 +4,7 @@ from rdkit.Chem import AllChem from rdkit.Chem import rdForceFieldHelpers from rdkit.Chem import ChemicalForceFields +from rdkit.Chem import rdDistGeom import isicle from isicle.geometry import Geometry, XYZGeometry @@ -482,7 +483,7 @@ def get_structure(self): class RDKitWrapper(Geometry, WrapperInterface): """ - Wrapper for rdkit functionality. + Wrapper for RDKit functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -509,13 +510,88 @@ def __init__(self, **kwargs): self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) - def set_geometry(self): + def set_geometry(self, geom): + """ + Set :obj:`~isicle.geometry.Geometry` instance for simulation. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Assign geometry + self.geom = geom + self.basename = self.geom.basename + + def configure(self, method="distance", ff="mmff94", **kwargs): + """ + Set conformer generation parameters. + Parameters + ---------- + method: str + `distance` for distance geometry method (default) + ff: str + Forcefield used in conformer generation. + seed: + randomSeed argument of conformer generation. Enables reproducibility. + + """ + lookup = { + "distance": self._configure_distance_geometry, + "etkdg": self._configure_etkdg, + } + try: + lookup[method.lower()](**kwargs) + except KeyError: + raise CustomException("Not a supported RDKit method") + + # TODO run forcefield availability check code found in geometry + self.ff = ff return - def configure(self): + def _configure_distance_geometry( + self, numConfs=10, randomSeed=-1, pruneRmsThresh=-1.0, forceTol=0.001 + ): + """ + Set parameters for distance geometry based conformer generation. + Parameters + ---------- + numConfs: int + Number of conformers to generate. + randomSeed: int + set seed for randomness, argument of `rdkit.Chem.rdDistGeom.EmbedMultipleConfs` + pruneRmsThresh: float + Greedy pruning mainting conformers are apart based upon RMSD of heavy atoms. + Default: no pruning, -1.0 + forceTol: float + Tolerance to be used in force-field minimizations + """ + # TODO numThreads argument, look in to multi-thread support in compilation + self.method = "distance" + self.numConfs = numConfs + self.randomSeed = randomSeed + self.pruneRmsThresh = pruneRmsThresh + self.forceTol = forceTol + + def _configure_etkdg(self, version=2): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + """ + lookup = {1: "etkdg1", 2: "etkdg2"} + try: + self.method = lookup[version] + except KeyError: + raise CustomException( + "Only versions 1 and 2 are available for ETKDG method." + ) + return def submit(self): + mol = self.geom.mol + rdDistGeom.EmbedMultipleConfs(mol, self.confno, randomSeed=self.seed) return def run(self): From a69208e7c89359d5649914882992eab82e3ea6ec Mon Sep 17 00:00:00 2001 From: jessbade Date: Sun, 3 Dec 2023 22:25:12 -0800 Subject: [PATCH 02/18] Add etkdg configuration to md --- isicle/md.py | 51 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index b9b602e..f671bb5 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -548,7 +548,7 @@ def configure(self, method="distance", ff="mmff94", **kwargs): raise CustomException("Not a supported RDKit method") # TODO run forcefield availability check code found in geometry - self.ff = ff + # self.ff = ff return def _configure_distance_geometry( @@ -575,24 +575,59 @@ def _configure_distance_geometry( self.pruneRmsThresh = pruneRmsThresh self.forceTol = forceTol - def _configure_etkdg(self, version=2): + def _configure_etkdg(self, version=2, variant=False): """ Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Parameters + ---------- + version: int + Specify version of ETKDG to use. + Version 1: RDKit default + Version 2: (default) 2016 release, updated torsion angle potentials + Version 3: Updated sampling small rings and macrocycles + variant: bool + Specify if variant of ETKDGv3 should be used (srETKDGv3). + True: updated sampling for small rings + False: (default) updated sampling for small rings and macrocyles """ - lookup = {1: "etkdg1", 2: "etkdg2"} + lookup = {1: "etkdg1", 2: "etkdg2", 3: "etkdg3"} try: self.method = lookup[version] + self.variant = variant except KeyError: - raise CustomException( - "Only versions 1 and 2 are available for ETKDG method." - ) + raise "Version options [1,2,3] for ETKDG method." return + def _submit_etkdg(self): + if self.method == "etkdg1": + params = rdDistGeom.ETKDG() + elif self.method == "etkdg2": + params = rdDistGeom.ETKDGv2() + elif self.method == "etkdg3" and self.variant is False: + params = rdDistGeom.ETKDGv3() + elif self.method == "etkdg3" and self.variant is True: + params = rdDistGeom.srETKDGv3() + else: + raise "Failure to run RDKit MD, method and/or variant not recognized" + return params + def submit(self): mol = self.geom.mol - rdDistGeom.EmbedMultipleConfs(mol, self.confno, randomSeed=self.seed) - return + if self.method == "distance": + rdDistGeom.EmbedMultipleConfs( + mol, + numConfs=self.numConfs, + randomSeed=self.randomSeed, + pruneRmsThresh=self.pruneRmsThresh, + forceTol=self.forceTol, + ) + elif "etkdg" in self.method: + params = self._submit_etkdg() + else: + raise CustomException( + "Failure to run RDKit MD, method and/or variant not recognized" + ) def run(self): return From d3bb1655bcd0268994b8a988632357d9a859dc41 Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 11:55:41 -0800 Subject: [PATCH 03/18] Update RDKit finish, run funcs --- isicle/md.py | 55 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index f671bb5..b6680c0 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -534,9 +534,10 @@ def configure(self, method="distance", ff="mmff94", **kwargs): `distance` for distance geometry method (default) ff: str Forcefield used in conformer generation. - seed: - randomSeed argument of conformer generation. Enables reproducibility. - + **kwargs: + Keyword arguments to configure the simulation. + See :meth:`~isicle.md.RDKitWrapper._configure_distance_geometry`, + :meth:`~isicle.md.RDKitWrapper._configure_etkdg`. """ lookup = { "distance": self._configure_distance_geometry, @@ -622,18 +623,58 @@ def submit(self): pruneRmsThresh=self.pruneRmsThresh, forceTol=self.forceTol, ) + self.geom.mol = mol elif "etkdg" in self.method: params = self._submit_etkdg() + rdDistGeom.EmbedMultipleConfs(mol, numConfs=self.numconfs, params=params) + self.geom.mol = mol else: raise CustomException( "Failure to run RDKit MD, method and/or variant not recognized" ) - def run(self): - return - def finish(self): - return + """ + Parse RDKit conformers generated. + """ + + self.conformers = [i for i in self.geom.mol.GetConformers()] + + def run(self, geom, **kwargs): + """ + Generate conformers using RKDit and supplied parameters. + + Parameters + ---------- + geom : :obj:`~isicle.geometry.Geometry` + Molecule representation. + **kwargs + Keyword arguments to configure the simulation. + See :meth:`~isicle.md.RDKitWrapper.configure`. + + Returns + ------- + :obj:`~isicle.md.XTBWrapper` + Wrapper object containing relevant outputs from the simulation. + + """ + + # New instance + self = RDKitWrapper() + + # Set geometry + self.set_geometry(geom) + + # Configure + self.configure(**kwargs) + + # Run QM simulation + self.submit() + + # Finish/clean up + self.finish() + + return self class TINKERWrapper(Geometry, WrapperInterface): From 5ec88579335044b6965d64927b3f770f0621800a Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:06:39 -0800 Subject: [PATCH 04/18] Add docstring formatting, create rdkit parser --- isicle/parse.py | 194 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 175 insertions(+), 19 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index 6a8b890..e26118c 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -10,7 +10,9 @@ class NWChemParser(FileParserInterface): - """Extract text from an NWChem simulation output file.""" + """ + Extract text from an NWChem simulation output file. + """ def __init__(self): self.contents = None @@ -18,13 +20,18 @@ def __init__(self): self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path return self.contents def _parse_geometry(self): + """ + Add docstring + """ search = os.path.dirname(self.path) geoms = sorted(glob.glob(os.path.join(search, "*.xyz"))) @@ -34,6 +41,9 @@ def _parse_geometry(self): raise Exception def _parse_energy(self): + """ + Add docstring + """ # TO DO: Add Initial energy and final energy if different # Init @@ -48,6 +58,9 @@ def _parse_energy(self): return energy def _parse_shielding(self): + """ + Add docstring + """ # Init ready = False shield_idxs = [] @@ -78,6 +91,9 @@ def _parse_shielding(self): raise Exception def _parse_spin(self): + """ + Add docstring + """ # TO DO: Add g-factors # Declaring couplings @@ -117,6 +133,9 @@ def _parse_spin(self): } def _parse_frequency(self): + """ + Add docstring + """ # TO DO: Add freq intensities # TO DO: Add rotational/translational/vibrational Cv and entropy freq = None @@ -185,6 +204,9 @@ def _parse_frequency(self): raise Exception def _parse_charge(self): + """ + Add docstring + """ # TO DO: Parse molecular charge and atomic charges # TO DO: Add type of charge # TO DO: Multiple instances of charge analysis seen (two Mulliken and one Lowdin, difference?) @@ -234,6 +256,9 @@ def _parse_charge(self): raise Exception def _parse_timing(self): + """ + Add docstring + """ # Init indices = [] preoptTime = 0 @@ -284,6 +309,9 @@ def _parse_timing(self): } def _parse_molden(self): + """ + Add docstring + """ search = splitext(self.path)[0] m = glob.glob(search + "*.molden") @@ -293,7 +321,9 @@ def _parse_molden(self): return m[0] def _parse_protocol(self): - """Parse out dft protocol""" + """ + Parse out dft protocol + """ functional = [] basis_set = [] solvation = [] @@ -338,6 +368,9 @@ def _parse_protocol(self): } def _parse_connectivity(self): + """ + Add docstring + """ coor_substr = "internuclear distances" # Extracting Atoms & Coordinates @@ -437,21 +470,30 @@ def save(self, path): class ImpactParser(FileParserInterface): - """Extract text from an Impact mobility calculation output file.""" + """ + Extract text from an Impact mobility calculation output file. + """ def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "rb") as f: self.contents = f.readlines() return self.contents def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check CCS results == 1 count = 0 @@ -488,27 +530,38 @@ def parse(self): return result # TODO: return CCS? def save(self, path: str, sep="\t"): - """Write parsed object to file""" + """ + Write parsed object to file + """ pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class MobcalParser(FileParserInterface): - """Extract text from a MOBCAL mobility calculation output file.""" + """ + Extract text from a MOBCAL mobility calculation output file. + """ def __init__(self): + """ + Add docstring + """ self.contents = None self.result = {} def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() return self.contents def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ done = False for line in self.contents: # if "average (second order) TM mobility" in line: @@ -525,41 +578,63 @@ def parse(self): return self.result def save(self, path: str, sep="\t"): - """Write parsed object to file""" + """ + Write parsed object to file + """ pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class SanderParser(FileParserInterface): - """Extract text from an Sander simulated annealing simulation output file.""" + """ + Extract text from an Sander simulated annealing simulation output file. + """ def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ raise NotImplementedError def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ raise NotImplementedError def save(self, path: str): - """Write parsed object to file""" + """ + Write parsed object to file + """ raise NotImplementedError class XTBParser(FileParserInterface): + """ + Add docstring + """ + def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path # return self.contents def _crest_energy(self): + """ + Add docstring + """ relative_energy = [] total_energy = [] population = [] @@ -588,6 +663,10 @@ def _crest_energy(self): } def _crest_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -626,6 +705,9 @@ def grab_time(line): } def _isomer_energy(self): + """ + Add docstring + """ complete = False relative_energies = [] total_energies = [] @@ -647,6 +729,10 @@ def _isomer_energy(self): return {"relative energy": relative_energies, "total energy": total_energies} def _isomer_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -670,6 +756,9 @@ def grab_time(line): } def _opt_energy(self): + """ + Add docstring + """ for line in self.contents: if "TOTAL ENERGY" in line: energy = line.split()[3] + " Hartrees" @@ -677,6 +766,10 @@ def _opt_energy(self): return {"Total energy": energy} def _opt_timing(self): + """ + Add docstring + """ + def grab_time(line): line = line.replace(" ", "") line = line.split(":") @@ -707,6 +800,9 @@ def grab_time(line): } def _parse_energy(self): + """ + Add docstring + """ if self.parse_crest == True: return self._crest_energy() if self.parse_opt == True: @@ -715,6 +811,9 @@ def _parse_energy(self): return self._isomer_energy() def _parse_timing(self): + """ + Add docstring + """ if self.parse_crest == True: return self._crest_timing() if self.parse_opt == True: @@ -723,6 +822,9 @@ def _parse_timing(self): return self._isomer_timing() def _parse_protocol(self): + """ + Add docstring + """ protocol = None for line in self.contents: @@ -755,7 +857,9 @@ def _parse_xyz(self): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check that the file is valid first if len(self.contents) == 0: @@ -835,24 +939,67 @@ def parse(self): return result def save(self, path): + """ + Add docstring + """ with open(path, "wb") as f: pickle.dump(self, f) return +class RDKitParser(FileParserInterface): + """ """ + + def __init__(self): + """ + Add docstring + """ + return + + def load(self, path: str): + """ + Add docstring + """ + return + + def parse(self): + """ + Add docstring + """ + return + + def save(self, path: str): + """ + Add docstring + """ + return + + class TINKERParser(FileParserInterface): + """ + Add docstring + """ + def __init__(self): + """ + Add docstring + """ self.contents = None self.result = None self.path = None def load(self, path: str): - """Load in the data file""" + """ + Load in the data file + """ with open(path, "r") as f: self.contents = f.readlines() self.path = path def _parse_energy(self): + """ + Add docstring + """ inp = self.contents if len(inp) < 13: quit() @@ -869,6 +1016,10 @@ def _parse_energy(self): return energies def _parse_conformers(self): + """ + Add docstring + """ + def parse_atom_symbol(AtomNum): Lookup = [ "H", @@ -1444,7 +1595,9 @@ def parse_atom_symbol(AtomNum): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - """Extract relevant information from data""" + """ + Extract relevant information from data + """ # Check that the file is valid first if len(self.contents) == 0: @@ -1471,4 +1624,7 @@ def parse(self): return result def save(self): + """ + Add docstring + """ return From 358c93df99cf0bc88ba23cecc2e4ad8b89edbcfa Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:18:58 -0800 Subject: [PATCH 05/18] Add tinker lookup helper func --- isicle/utils.py | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/isicle/utils.py b/isicle/utils.py index 486b980..d4a99b7 100644 --- a/isicle/utils.py +++ b/isicle/utils.py @@ -8,7 +8,7 @@ def safelist(x): - ''' + """ Ensures passed object is of correct format. Parameters @@ -20,7 +20,7 @@ def safelist(x): list, :obj:`~pd.core.series.Series`, or :obj:`~np.ndarray` Input safely cast to list-like. - ''' + """ if not isinstance(x, (list, pd.core.series.Series, np.ndarray)): return [x].copy() @@ -28,7 +28,7 @@ def safelist(x): class TypedList(collections.abc.MutableSequence): - ''' + """ Mutable sequence that requires all members be of select type(s). Attributes @@ -38,10 +38,10 @@ class TypedList(collections.abc.MutableSequence): list : list Internal list representation. - ''' + """ def __init__(self, oktypes, *args): - ''' + """ Initialize :obj:`~isicle.utils.TypedList` instance. Parameters @@ -51,7 +51,7 @@ def __init__(self, oktypes, *args): *args Objects to comprise the type-restricted list. - ''' + """ self.oktypes = oktypes self.list = list() @@ -61,7 +61,7 @@ def __init__(self, oktypes, *args): self.extend(list(args)) def check(self, v): - ''' + """ Check if supplied value is of allowed type(s). Raises @@ -69,7 +69,7 @@ def check(self, v): TypeError If value is not of allowed type(s). - ''' + """ if not isinstance(v, self.oktypes): raise TypeError(v) @@ -99,12 +99,17 @@ def __repr__(self): def atomic_masses(): - path = resources.files('isicle') / 'resources/atomic_masses.tsv' + path = resources.files("isicle") / "resources/atomic_masses.tsv" return pd.read_csv(path, delim_whitespace=True) +def tinker_lookup(): + path = resources.files("isicle") / "resources/tinker_lookup.tsv" + return pd.read_csv(path, sep="\t") + + def gettempdir(): - ''' + """ Return the name of the directory used for temporary files. Returns @@ -112,19 +117,19 @@ def gettempdir(): str Path to temporary directory. - ''' - - root = os.path.join(tempfile.gettempdir(), 'isicle') - + """ + + root = os.path.join(tempfile.gettempdir(), "isicle") + if not os.path.exists(root): os.makedirs(root) - + return root def mkdtemp(prefix=None, suffix=None): - ''' - An ISiCLE-specific wrapper of :func:`~tempfile.mkdtemp` to create a + """ + An ISiCLE-specific wrapper of :func:`~tempfile.mkdtemp` to create a temporary directory for temporary ISiCLE files. The temporary directory is not automatically removed. @@ -139,17 +144,17 @@ def mkdtemp(prefix=None, suffix=None): ------- str Path to temporary directory. - - ''' - + + """ + return tempfile.mkdtemp(dir=gettempdir(), prefix=prefix, suffix=suffix) def rmdtemp(): - ''' + """ Removes all temporary directories and files created by ISiCLE. - ''' + """ shutil.rmtree(gettempdir(), ignore_errors=True) From bf8a384038fd7ad64c4afaa8a3c58547f1985f5e Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:19:39 -0800 Subject: [PATCH 06/18] Replace long tinker list with lookup func to resource file --- isicle/parse.py | 430 +----------------------------------------------- 1 file changed, 2 insertions(+), 428 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index e26118c..fb4b416 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1121,434 +1121,8 @@ def parse_atom_symbol(AtomNum): conffile.close() conformers = [] atoms = [] - atomtypes = [ - "CR", - "C=C", - "CSP", - "C=O", - "C=N", - "CGD", - "C=O", - "C=O", - "CON", - "COO", - "COO", - "COO", - "C=O", - "C=S", - "C=S", - "CSO", - "CS=", - "CSS", - "C=P", - "CSP", - "=C=", - "HC", - "HSI", - "OR", - "OC=", - "OC=", - "OC=", - "OC=", - "ONO", - "ON=", - "OSO", - "OSO", - "OSO", - "OS=", - "-OS", - "OPO", - "OPO", - "OPO", - "-OP", - "-O-", - "O=C", - "O=C", - "O=C", - "O=C", - "O=N", - "O=S", - "O=S", - "NR", - "N=C", - "N=N", - "NC=", - "NC=", - "NN=", - "NN=", - "F", - "CL", - "BR", - "I", - "S", - "S=C", - "S=O", - ">S=", - "SO2", - "SO2", - "SO3", - "SO4", - "=SO", - "SNO", - "SI", - "CR4", - "HOR", - "HO", - "HOM", - "CR3", - "HNR", - "H3N", - "HPY", - "HNO", - "HNM", - "HN", - "HOC", - "HOP", - "PO4", - "PO3", - "PO2", - "PO", - "PTE", - "P", - "HN=", - "HN=", - "HNC", - "HNC", - "HNC", - "HNC", - "HNN", - "HNN", - "HNS", - "HNP", - "HNC", - "HSP", - "HOC", - "HOC", - "CE4", - "HOH", - "O2C", - "OXN", - "O2N", - "O2N", - "O3N", - "O-S", - "O2S", - "O3S", - "O4S", - "OSM", - "OP", - "O2P", - "O3P", - "O4P", - "O4C", - "HOS", - "NR+", - "OM", - "OM2", - "HNR", - "HIM", - "HPD", - "HNN", - "HNC", - "HGD", - "HN5", - "CB", - "NPY", - "NPY", - "NC=", - "NC=", - "NC=", - "NC%", - "CO2", - "CS2", - "NSP", - "NSO", - "NSO", - "NPO", - "NPO", - "NC%", - "STH", - "NO2", - "NO3", - "N=O", - "NAZ", - "NSO", - "O+", - "HO+", - "O=+", - "HO=", - "=N=", - "N+=", - "N+=", - "NCN", - "NGD", - "CGD", - "CNN", - "NPD", - "OFU", - "C%", - "NR%", - "NM", - "C5A", - "C5B", - "N5A", - "N5B", - "N2O", - "N3O", - "NPO", - "OH2", - "HS", - "HS=", - "HP", - "S-P", - "S2C", - "SM", - "SSM", - "SO2", - "SSO", - "=S=", - "-P=", - "N5M", - "CLO", - "C5", - "N5", - "CIM", - "NIM", - "N5A", - "N5B", - "N5+", - "N5A", - "N5B", - "N5O", - "FE+", - "FE+", - "F-", - "CL-", - "BR-", - "LI+", - "NA+", - "K+", - "ZIN", - "ZN+", - "CA+", - "CU+", - "CU+", - "MG+", - ] - anums = [ - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 6, - 1, - 1, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 9, - 17, - 35, - 53, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 14, - 6, - 1, - 1, - 1, - 6, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 15, - 15, - 15, - 15, - 15, - 15, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 6, - 1, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 8, - 1, - 7, - 8, - 8, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 16, - 7, - 7, - 7, - 7, - 7, - 8, - 1, - 8, - 1, - 7, - 7, - 7, - 7, - 7, - 6, - 6, - 7, - 8, - 6, - 7, - 7, - 6, - 6, - 7, - 7, - 7, - 7, - 7, - 8, - 1, - 1, - 1, - 16, - 16, - 16, - 16, - 16, - 16, - 16, - 15, - 7, - 17, - 6, - 7, - 6, - 7, - 7, - 7, - 7, - 7, - 7, - 7, - 26, - 26, - 9, - 17, - 35, - 3, - 11, - 19, - 30, - 30, - 20, - 29, - 29, - 12, - ] + atomtypes = isicle.utils.tinker_lookup()["atomtypes"].to_list() + anums = isicle.utils.tinker_lookup()["anums"].to_list() atypes = [x[:3] for x in atomtypes] # Parse data from arc file, extract coordinates and atom type From deeb0acbf6e6458bd78961935966b19d211324ca Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:54:05 -0800 Subject: [PATCH 07/18] Add confID to rdkit conformers, conf ensemble conversion --- isicle/md.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/isicle/md.py b/isicle/md.py index b6680c0..5c37ca4 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -637,8 +637,16 @@ def finish(self): """ Parse RDKit conformers generated. """ + conformers = [ + isicle.load(i, basename=self.geom.mol.basename) + for i in self.geom.mol.GetConformers() + ] - self.conformers = [i for i in self.geom.mol.GetConformers()] + confIDs = [i.GetId() for i in self.geom.mol.GetConformers()] + + self.geom = conformers + for conf, label in zip(self.geom, confIDs): + conf.__dict__.update(conformerID=label) def run(self, geom, **kwargs): """ @@ -676,6 +684,40 @@ def run(self, geom, **kwargs): return self + def get_structures(self): + """ + Extract all structures from containing object as a conformational ensemble. + + Returns + ------- + :obj:`~isicle.conformers.ConformationalEnsemble` + Conformational ensemble. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + return self.geom + + raise TypeError( + "Object does not contain multiple structures. Use `get_structure` instead." + ) + + def get_structure(self): + """ + Extract structure from containing object. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Structure instance. + + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): + raise TypeError( + "Object contains multiple structures. Use `get_structures` instead." + ) + + return self.geom + class TINKERWrapper(Geometry, WrapperInterface): From 8fc40e58e7ec2ebd44277e83047d1b0725d3f4da Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:55:18 -0800 Subject: [PATCH 08/18] Remove unnecessary rdkit parsing class --- isicle/parse.py | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) diff --git a/isicle/parse.py b/isicle/parse.py index fb4b416..c7544b4 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -947,34 +947,6 @@ def save(self, path): return -class RDKitParser(FileParserInterface): - """ """ - - def __init__(self): - """ - Add docstring - """ - return - - def load(self, path: str): - """ - Add docstring - """ - return - - def parse(self): - """ - Add docstring - """ - return - - def save(self, path: str): - """ - Add docstring - """ - return - - class TINKERParser(FileParserInterface): """ Add docstring @@ -1021,6 +993,7 @@ def _parse_conformers(self): """ def parse_atom_symbol(AtomNum): + # TODO: modify lookup to use resources/atomic_masses.tsv Lookup = [ "H", "He", From b8ef13716a3a75a42e8c3ef46a6340c0d4294de7 Mon Sep 17 00:00:00 2001 From: jessbade Date: Tue, 6 Feb 2024 12:55:45 -0800 Subject: [PATCH 09/18] Add resource file for tinker atom type lookup --- isicle/resources/tinker_lookup.tsv | 213 +++++++++++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 isicle/resources/tinker_lookup.tsv diff --git a/isicle/resources/tinker_lookup.tsv b/isicle/resources/tinker_lookup.tsv new file mode 100644 index 0000000..fb832a1 --- /dev/null +++ b/isicle/resources/tinker_lookup.tsv @@ -0,0 +1,213 @@ +atomtypes anums +CR 6 +C=C 6 +CSP 6 +C=O 6 +C=N 6 +CGD 6 +C=O 6 +C=O 6 +CON 6 +COO 6 +COO 6 +COO 6 +C=O 6 +C=S 6 +C=S 6 +CSO 6 +CS= 6 +CSS 6 +C=P 6 +CSP 6 +=C= 6 +HC 1 +HSI 1 +OR 8 +OC= 8 +OC= 8 +OC= 8 +OC= 8 +ONO 8 +ON= 8 +OSO 8 +OSO 8 +OSO 8 +OS= 8 +-OS 8 +OPO 8 +OPO 8 +OPO 8 +-OP 8 +-O- 8 +O=C 8 +O=C 8 +O=C 8 +O=C 8 +O=N 8 +O=S 8 +O=S 8 +NR 7 +N=C 7 +N=N 7 +NC= 7 +NC= 7 +NN= 7 +NN= 7 +F 9 +CL 17 +BR 35 +I 53 +S 16 +S=C 16 +S=O 16 +>S= 16 +SO2 16 +SO2 16 +SO3 16 +SO4 16 +=SO 16 +SNO 16 +SI 14 +CR4 6 +HOR 1 +HO 1 +HOM 1 +CR3 6 +HNR 1 +H3N 1 +HPY 1 +HNO 1 +HNM 1 +HN 1 +HOC 1 +HOP 1 +PO4 15 +PO3 15 +PO2 15 +PO 15 +PTE 15 +P 15 +HN= 1 +HN= 1 +HNC 1 +HNC 1 +HNC 1 +HNC 1 +HNN 1 +HNN 1 +HNS 1 +HNP 1 +HNC 1 +HSP 1 +HOC 1 +HOC 1 +CE4 6 +HOH 1 +O2C 8 +OXN 8 +O2N 8 +O2N 8 +O3N 8 +O-S 8 +O2S 8 +O3S 8 +O4S 8 +OSM 8 +OP 8 +O2P 8 +O3P 8 +O4P 8 +O4C 8 +HOS 1 +NR+ 7 +OM 8 +OM2 8 +HNR 1 +HIM 1 +HPD 1 +HNN 1 +HNC 1 +HGD 1 +HN5 1 +CB 6 +NPY 7 +NPY 7 +NC= 7 +NC= 7 +NC= 7 +NC% 7 +CO2 6 +CS2 6 +NSP 7 +NSO 7 +NSO 7 +NPO 7 +NPO 7 +NC% 7 +STH 16 +NO2 7 +NO3 7 +N=O 7 +NAZ 7 +NSO 7 +O+ 8 +HO+ 1 +O=+ 8 +HO= 1 +=N= 7 +N+= 7 +N+= 7 +NCN 7 +NGD 7 +CGD 6 +CNN 6 +NPD 7 +OFU 8 +C% 6 +NR% 7 +NM 7 +C5A 6 +C5B 6 +N5A 7 +N5B 7 +N2O 7 +N3O 7 +NPO 7 +OH2 8 +HS 1 +HS= 1 +HP 1 +S-P 16 +S2C 16 +SM 16 +SSM 16 +SO2 16 +SSO 16 +=S= 16 +-P= 15 +N5M 7 +CLO 17 +C5 6 +N5 7 +CIM 6 +NIM 7 +N5A 7 +N5B 7 +N5+ 7 +N5A 7 +N5B 7 +N5O 7 +FE+ 26 +FE+ 26 +F- 9 +CL- 17 +BR- 35 +LI+ 3 +NA+ 11 +K+ 19 +ZIN 30 +ZN+ 30 +CA+ 20 +CU+ 29 +CU+ 29 +MG+ 12 From d2f1c48c530f703564ab92b5b659b3890b313046 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 09:42:22 -0800 Subject: [PATCH 10/18] Modify load_mol_obj to isinstance check --- isicle/io.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/isicle/io.py b/isicle/io.py index a60a5c6..c07c5b7 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -153,7 +153,6 @@ def load_nwchem(path, basename=None): """ - dft = isicle.qm.NWChemWrapper() parser = isicle.parse.NWChemParser() @@ -161,8 +160,7 @@ def load_nwchem(path, basename=None): result = parser.parse() dft.__dict__.update(result) - dft.geom.add___dict__({k: v for k, - v in result.items() if k != 'geom'}) + dft.geom.add___dict__({k: v for k, v in result.items() if k != "geom"}) dft.output = parser.load(path) if basename is None: @@ -170,7 +168,7 @@ def load_nwchem(path, basename=None): basename = Chem.MolToInchiKey(dft.geom.mol) except: basename = os.path.splitext((os.path.basename(path)))[0] - + dft.basename = basename return dft @@ -307,7 +305,9 @@ def load_smiles(path, force=False, basename=None): """ extension = os.path.splitext(path)[-1].lower() if "smi" in extension: - return _load_line_notation(path, func=Chem.MolFromSmiles, force=force, basename=basename) + return _load_line_notation( + path, func=Chem.MolFromSmiles, force=force, basename=basename + ) else: return _load_line_notation( path, func=Chem.MolFromSmiles, force=force, string=True, basename=basename @@ -336,7 +336,9 @@ def load_inchi(path, force=False, basename=None): path, func=Chem.MolFromInchi, force=force, string=True, basename=basename ) else: - return _load_line_notation(path, func=Chem.MolFromInchi, force=force, basename=basename) + return _load_line_notation( + path, func=Chem.MolFromInchi, force=force, basename=basename + ) def load_pickle(path): @@ -383,11 +385,10 @@ def load_joblib(path): def _check_mol_obj(mol_obj): """ """ - try: - Chem.MolToMolBlock(mol_obj) - except ValueError: - print("Invalid RDKit mol object") - raise + if isinstance(mol_obj, Chem.Mol): + return + else: + raise IOError("Not a valid RDKit Mol object passed.") def load_mol_obj(mol_obj, basename=None): @@ -478,8 +479,6 @@ def load(path, force=False, basename=None): return load_mol_obj(path, basename=basename) except: raise IOError("Not a valid RDKit mol object passed.") - - def save_xyz(path, geom): From fb7c74df354ced0672f8275d3d1ee5aca6a676d2 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 09:48:14 -0800 Subject: [PATCH 11/18] Fix rdkit conformer parse (finish step) --- isicle/md.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 5c37ca4..9ecd91d 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -614,20 +614,19 @@ def _submit_etkdg(self): return params def submit(self): - mol = self.geom.mol if self.method == "distance": rdDistGeom.EmbedMultipleConfs( - mol, + self.geom.mol, numConfs=self.numConfs, randomSeed=self.randomSeed, pruneRmsThresh=self.pruneRmsThresh, forceTol=self.forceTol, ) - self.geom.mol = mol elif "etkdg" in self.method: params = self._submit_etkdg() - rdDistGeom.EmbedMultipleConfs(mol, numConfs=self.numconfs, params=params) - self.geom.mol = mol + rdDistGeom.EmbedMultipleConfs( + self.geom.mol, numConfs=self.numconfs, params=params + ) else: raise CustomException( "Failure to run RDKit MD, method and/or variant not recognized" @@ -637,15 +636,14 @@ def finish(self): """ Parse RDKit conformers generated. """ + confCount = self.geom.mol.GetNumConformers() conformers = [ - isicle.load(i, basename=self.geom.mol.basename) - for i in self.geom.mol.GetConformers() + isicle.load(Chem.Mol(self.geom.mol, confId=i), basename=self.basename) + for i in range(confCount) ] - confIDs = [i.GetId() for i in self.geom.mol.GetConformers()] - self.geom = conformers - for conf, label in zip(self.geom, confIDs): + for conf, label in zip(self.geom, range(confCount)): conf.__dict__.update(conformerID=label) def run(self, geom, **kwargs): From 5a14b6004974df02f1359e2d0eba4fabf341d558 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 10:17:31 -0800 Subject: [PATCH 12/18] Clean up rdkit conformer configuration --- isicle/md.py | 116 +++++++++++++++++++++++++-------------------------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 9ecd91d..7816dc8 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -503,7 +503,7 @@ class RDKitWrapper(Geometry, WrapperInterface): """ - _defaults = ["geom"] + _defaults = ["geom", "method", "numConfs"] _default_value = None def __init__(self, **kwargs): @@ -525,93 +525,94 @@ def set_geometry(self, geom): self.geom = geom self.basename = self.geom.basename - def configure(self, method="distance", ff="mmff94", **kwargs): + def configure(self, method="distance", numConfs=10, **kwargs): """ Set conformer generation parameters. Parameters ---------- method: str `distance` for distance geometry method (default) - ff: str - Forcefield used in conformer generation. + `etkdg` for ETKDG method + `etkdgv2` for ETKDG method + `etkdgv3` for ETKDG method + `sretkdgv3` for ETKDG method + numConfs: int + the number of conformers to generate **kwargs: - Keyword arguments to configure the simulation. - See :meth:`~isicle.md.RDKitWrapper._configure_distance_geometry`, - :meth:`~isicle.md.RDKitWrapper._configure_etkdg`. + Keyword arguments to configure the simulation + See :meth:`~isicle.md.RDKitWrapper._configure_distance_geometry` """ lookup = { "distance": self._configure_distance_geometry, - "etkdg": self._configure_etkdg, + "etdg": self._configure_etdg, + "etkdg": self._configure_etkdg1, + "etkdgv1": self._configure_etkdg1, + "etkdgv2": self._configure_etkdg2, + "etkdgv3": self._configure_etkdg3, + "sretkdgv3": self._configure_etkdg3_variant, } try: lookup[method.lower()](**kwargs) except KeyError: - raise CustomException("Not a supported RDKit method") - - # TODO run forcefield availability check code found in geometry - # self.ff = ff - return + raise CustomException( + "RDKit supports distance, ETDG, ETKDG, ETKDGv2, ETKDGv3, srETKDGv3" + ) + self.method = method.lower() + self.numConfs = numConfs def _configure_distance_geometry( - self, numConfs=10, randomSeed=-1, pruneRmsThresh=-1.0, forceTol=0.001 + self, pruneRmsThresh=-1.0, forceTol=0.001, randomSeed=-1 ): """ Set parameters for distance geometry based conformer generation. Parameters ---------- - numConfs: int - Number of conformers to generate. - randomSeed: int - set seed for randomness, argument of `rdkit.Chem.rdDistGeom.EmbedMultipleConfs` pruneRmsThresh: float Greedy pruning mainting conformers are apart based upon RMSD of heavy atoms. Default: no pruning, -1.0 forceTol: float Tolerance to be used in force-field minimizations + randomSeed: int + provide a seed for the random number generator + `-1` causes RNG to not be seeded """ - # TODO numThreads argument, look in to multi-thread support in compilation - self.method = "distance" - self.numConfs = numConfs - self.randomSeed = randomSeed self.pruneRmsThresh = pruneRmsThresh self.forceTol = forceTol + self.randomSeed = randomSeed + + def _configure_etdg(self): + """ + Set parameters for ETDG conformer generation. + """ + self.params = rdDistGeom.ETDG() - def _configure_etkdg(self, version=2, variant=False): + def _configure_etkdg1(self): """ Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. - Parameters - ---------- - version: int - Specify version of ETKDG to use. - Version 1: RDKit default - Version 2: (default) 2016 release, updated torsion angle potentials - Version 3: Updated sampling small rings and macrocycles - variant: bool - Specify if variant of ETKDGv3 should be used (srETKDGv3). - True: updated sampling for small rings - False: (default) updated sampling for small rings and macrocyles + Version 1: RDKit default """ - lookup = {1: "etkdg1", 2: "etkdg2", 3: "etkdg3"} - try: - self.method = lookup[version] - self.variant = variant - except KeyError: - raise "Version options [1,2,3] for ETKDG method." - - return - - def _submit_etkdg(self): - if self.method == "etkdg1": - params = rdDistGeom.ETKDG() - elif self.method == "etkdg2": - params = rdDistGeom.ETKDGv2() - elif self.method == "etkdg3" and self.variant is False: - params = rdDistGeom.ETKDGv3() - elif self.method == "etkdg3" and self.variant is True: - params = rdDistGeom.srETKDGv3() - else: - raise "Failure to run RDKit MD, method and/or variant not recognized" - return params + self.params = rdDistGeom.ETKDG() + + def _configure_etkdg2(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 2: (default) 2016 release, updated torsion angle potentials + """ + self.params = rdDistGeom.ETKDGv2() + + def _configure_etkdg3(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 3: Updated sampling small rings AND macrocycles + """ + self.params = rdDistGeom.ETKDGv3() + + def _configure_etkdg3_variant(self): + """ + Set parameters for ETKDG conformer generation, based on work by Riniker and Landrum. + Version 3 variant: Updated sampling for small rings, NOT macrocycles + """ + self.params = rdDistGeom.srETKDGv3() def submit(self): if self.method == "distance": @@ -622,10 +623,9 @@ def submit(self): pruneRmsThresh=self.pruneRmsThresh, forceTol=self.forceTol, ) - elif "etkdg" in self.method: - params = self._submit_etkdg() + elif "etkdg" in self.method or "etdg" in self.method: rdDistGeom.EmbedMultipleConfs( - self.geom.mol, numConfs=self.numconfs, params=params + self.geom.mol, numConfs=self.numConfs, params=self.params ) else: raise CustomException( From 84958cf0dc42fbe45bdd7e3ccba674789b2c8d0a Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 11:43:49 -0800 Subject: [PATCH 13/18] Clean up RDKitWrapper doc strings --- isicle/md.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 7816dc8..d468811 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -489,18 +489,12 @@ class RDKitWrapper(Geometry, WrapperInterface): Attributes ---------- - temp_dir : str - Path to temporary directory used for simulation. - task_map : dict - Alias mapper for supported molecular dynamic presets. Includes - "optimize", "conformer", "nmr", "protonate", "deprotonate", and "tautomer". geom : :obj:`isicle.geometry.Geometry` Internal molecule representation. - fmt : str - File extension indicator. - job_list : str - List of commands for simulation. - + method: str + Method of RDKit conformer generation specified. + numConfs: int + The number of conformers to generate. """ _defaults = ["geom", "method", "numConfs"] @@ -615,6 +609,9 @@ def _configure_etkdg3_variant(self): self.params = rdDistGeom.srETKDGv3() def submit(self): + """ + Execute conformer generation with user-specifed method, parameters. + """ if self.method == "distance": rdDistGeom.EmbedMultipleConfs( self.geom.mol, @@ -660,7 +657,7 @@ def run(self, geom, **kwargs): Returns ------- - :obj:`~isicle.md.XTBWrapper` + :obj:`~isicle.md.RDKitWrapper` Wrapper object containing relevant outputs from the simulation. """ From f9bbfc93f12456978d9ea81ad28431014452453e Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 11:47:22 -0800 Subject: [PATCH 14/18] Change RDKitWrapper docstring --- isicle/md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/md.py b/isicle/md.py index d468811..1e4836f 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -549,7 +549,7 @@ def configure(self, method="distance", numConfs=10, **kwargs): lookup[method.lower()](**kwargs) except KeyError: raise CustomException( - "RDKit supports distance, ETDG, ETKDG, ETKDGv2, ETKDGv3, srETKDGv3" + "RDKit supports distance, ETDG, ETKDG, ETKDGv2, ETKDGv3, srETKDGv3 methods." ) self.method = method.lower() self.numConfs = numConfs From d3dea6fc1f42a348c3ee2ade5d3b380b4dfde912 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 11:55:50 -0800 Subject: [PATCH 15/18] Add rdkit test to program selector and linting --- tests/test_md.py | 224 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 185 insertions(+), 39 deletions(-) diff --git a/tests/test_md.py b/tests/test_md.py index 8e947dd..c305dd5 100644 --- a/tests/test_md.py +++ b/tests/test_md.py @@ -1,6 +1,6 @@ import pytest import isicle -from isicle.md import _program_selector, XTBWrapper, md +from isicle.md import _program_selector, XTBWrapper, md, RDKitWrapper from isicle.geometry import Geometry import os import shutil @@ -15,19 +15,30 @@ def localfile(path): def xtb(): return XTBWrapper() -#test capitalization ignore -@pytest.mark.parametrize('program,expected', - [('xtb', XTBWrapper), - ('XTB', XTBWrapper), - ('xTb', XTBWrapper)]) + +@pytest.fixture() +def rdkit(): + return RDKitWrapper() + + +# test capitalization ignore +@pytest.mark.parametrize( + "program,expected", + [ + ("xtb", XTBWrapper), + ("XTB", XTBWrapper), + ("xTb", XTBWrapper), + ("rdkit", RDKitWrapper), + ("RDKit", RDKitWrapper), + ("RDKIT", RDKitWrapper), + ], +) def test__program_selector(program, expected): # Check correct class is yielded assert isinstance(_program_selector(program), expected) -@pytest.mark.parametrize('program', - [('xt'), - ('amber')]) +@pytest.mark.parametrize("program", [("xt"), ("amber"), ("rd")]) def test__program_selector_fail(program): # Check unsupported selections with pytest.raises(ValueError): @@ -36,14 +47,20 @@ def test__program_selector_fail(program): def test_md(xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) # Run molecular dynamics - md(geom, program='xtb', fmt='xyz', task = 'optimize', forcefield='gff',optlevel='Normal') + md( + geom, + program="xtb", + fmt="xyz", + task="optimize", + forcefield="gff", + optlevel="Normal", + ) - -class TestXTBWrapper: +class TestXTBWrapper: def test_init(self, xtb): # Check class initialization assert isinstance(xtb, XTBWrapper) @@ -54,8 +71,9 @@ def test_init(self, xtb): # Clean up xtb.temp_dir.cleanup() - @pytest.mark.parametrize('path,expected', - [('resources/geom_test_3D.mol', 'geom_test_3D')]) + @pytest.mark.parametrize( + "path,expected", [("resources/geom_test_3D.mol", "geom_test_3D")] + ) def test_set_geometry(self, xtb, path, expected): # Load geometry externally geom = isicle.geometry.load(localfile(path)) @@ -72,13 +90,10 @@ def test_set_geometry(self, xtb, path, expected): # Clean up xtb.temp_dir.cleanup() - @pytest.mark.parametrize('fmt', - [('xyz'), - ('pdb'), - ('mol')]) + @pytest.mark.parametrize("fmt", [("xyz"), ("pdb"), ("mol")]) def test_save_geometry(self, xtb, fmt): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -87,34 +102,165 @@ def test_save_geometry(self, xtb, fmt): xtb.save_geometry(fmt=fmt) # Check if file exists - assert os.path.exists(os.path.join(xtb.temp_dir.name, - '{}.{}'.format(xtb.geom.basename, - fmt.lower()))) + assert os.path.exists( + os.path.join( + xtb.temp_dir.name, "{}.{}".format(xtb.geom.basename, fmt.lower()) + ) + ) + + # Clean up + xtb.temp_dir.cleanup() + + @pytest.mark.parametrize( + "tasks, forcefield, optlevel", + [ + ("optimize", ["gff", "gfn2"], ["Normal", "Tight"]), + (["crest", "protonate"], "gfn2", "Normal"), + ], + ) + def test_configure_failure(self, xtb, tasks, forcefield, optlevel): + with pytest.raises(TypeError): + xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) + + @pytest.mark.parametrize( + "tasks,forcefield,optlevel", + [ + ("optimize", "gff", "Normal"), + ("crest", "gff", "Normal"), + ("protonate", "gff", "Normal"), + ], + ) + def test_configure(self, xtb, tasks, forcefield, optlevel): + # Load geometry externally + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + xtb.set_geometry(geom) + + # Save geometry + xtb.save_geometry(fmt="xyz") + + # Set up commandline + xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) + + # Clean up + xtb.temp_dir.cleanup() + + def test_run(self, xtb): + # Load geometry externally + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + xtb.set_geometry(geom) + + # Save geometry + xtb.save_geometry(fmt="xyz") + + # Set up commandline + xtb.configure() + + # Run + xtb.run() + + # Clean up + xtb.temp_dir.cleanup() + + def test_finish(self, xtb): + # Load geometry externally + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + xtb.set_geometry(geom) + + # Save geometry + xtb.save_geometry(fmt="xyz") + + # Set up commandline + xtb.configure() + + # Run + xtb.run() + + # Finish + xtb.finish() # Clean up xtb.temp_dir.cleanup() - @pytest.mark.parametrize('tasks, forcefield, optlevel', - [('optimize', ['gff', 'gfn2'], ['Normal', 'Tight']), - (['crest', 'protonate'], 'gfn2', 'Normal')]) - def test_configure_failure(self,xtb,tasks,forcefield,optlevel): +class TestRDKitWrapper: + def test_init(self, rdw): + # Check class initialization + assert isinstance(rdw, RDKitWrapper) + + @pytest.mark.parametrize( + "path,expected", [("resources/geom_test_3D.mol", "geom_test_3D")] + ) + def test_set_geometry(self, xtb, path, expected): + # Load geometry externally + geom = isicle.geometry.load(localfile(path)) + + # Set geometry + xtb.set_geometry(geom) + + # Check class instance + assert isinstance(xtb.geom, Geometry) + + # Check basename attribute + assert geom.basename == expected + + # Clean up + xtb.temp_dir.cleanup() + + @pytest.mark.parametrize("fmt", [("xyz"), ("pdb"), ("mol")]) + def test_save_geometry(self, xtb, fmt): + # Load geometry externally + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + + # Set geometry + xtb.set_geometry(geom) + + # Save geometry + xtb.save_geometry(fmt=fmt) + + # Check if file exists + assert os.path.exists( + os.path.join( + xtb.temp_dir.name, "{}.{}".format(xtb.geom.basename, fmt.lower()) + ) + ) + + # Clean up + xtb.temp_dir.cleanup() + + @pytest.mark.parametrize( + "tasks, forcefield, optlevel", + [ + ("optimize", ["gff", "gfn2"], ["Normal", "Tight"]), + (["crest", "protonate"], "gfn2", "Normal"), + ], + ) + def test_configure_failure(self, xtb, tasks, forcefield, optlevel): with pytest.raises(TypeError): xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) - @pytest.mark.parametrize('tasks,forcefield,optlevel', - [('optimize', 'gff', 'Normal'), - ('crest', 'gff', 'Normal'), - ('protonate', 'gff', 'Normal')]) + @pytest.mark.parametrize( + "tasks,forcefield,optlevel", + [ + ("optimize", "gff", "Normal"), + ("crest", "gff", "Normal"), + ("protonate", "gff", "Normal"), + ], + ) def test_configure(self, xtb, tasks, forcefield, optlevel): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) @@ -122,15 +268,15 @@ def test_configure(self, xtb, tasks, forcefield, optlevel): # Clean up xtb.temp_dir.cleanup() - def test_run(self, xtb): + def test_run(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure() @@ -143,13 +289,13 @@ def test_run(self, xtb): def test_finish(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile('resources/geom_test_3D.mol')) + geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) # Save geometry - xtb.save_geometry(fmt='xyz') + xtb.save_geometry(fmt="xyz") # Set up commandline xtb.configure() @@ -161,4 +307,4 @@ def test_finish(self, xtb): xtb.finish() # Clean up - xtb.temp_dir.cleanup() \ No newline at end of file + xtb.temp_dir.cleanup() From 61f76fc36a56182fb7e6441b783ddc1f07b1794e Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 13:40:24 -0800 Subject: [PATCH 16/18] Fix RDKit tests failing --- tests/test_md.py | 145 +++++++++++++++++++++-------------------------- 1 file changed, 66 insertions(+), 79 deletions(-) diff --git a/tests/test_md.py b/tests/test_md.py index c305dd5..2105ad1 100644 --- a/tests/test_md.py +++ b/tests/test_md.py @@ -17,7 +17,7 @@ def xtb(): @pytest.fixture() -def rdkit(): +def rdw(): return RDKitWrapper() @@ -45,9 +45,9 @@ def test__program_selector_fail(program): _program_selector(program) -def test_md(xtb): +def test_md_xtb(xtb): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Run molecular dynamics md( @@ -60,6 +60,18 @@ def test_md(xtb): ) +def test_md_rdkit(rdw): + # Load geometry externally + geom = isicle.load(localfile("resources/geom_test_3D.mol")) + + # Run molecular dynamics + md( + geom, + program="rdkit", + method="etkdgv3", + ) + + class TestXTBWrapper: def test_init(self, xtb): # Check class initialization @@ -76,7 +88,7 @@ def test_init(self, xtb): ) def test_set_geometry(self, xtb, path, expected): # Load geometry externally - geom = isicle.geometry.load(localfile(path)) + geom = isicle.load(localfile(path)) # Set geometry xtb.set_geometry(geom) @@ -93,7 +105,7 @@ def test_set_geometry(self, xtb, path, expected): @pytest.mark.parametrize("fmt", [("xyz"), ("pdb"), ("mol")]) def test_save_geometry(self, xtb, fmt): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -132,7 +144,7 @@ def test_configure_failure(self, xtb, tasks, forcefield, optlevel): ) def test_configure(self, xtb, tasks, forcefield, optlevel): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -148,7 +160,7 @@ def test_configure(self, xtb, tasks, forcefield, optlevel): def test_run(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -167,7 +179,7 @@ def test_run(self, xtb): def test_finish(self, xtb): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry xtb.set_geometry(geom) @@ -196,115 +208,90 @@ def test_init(self, rdw): @pytest.mark.parametrize( "path,expected", [("resources/geom_test_3D.mol", "geom_test_3D")] ) - def test_set_geometry(self, xtb, path, expected): + def test_set_geometry(self, rdw, path, expected): # Load geometry externally - geom = isicle.geometry.load(localfile(path)) + geom = isicle.load(localfile(path)) # Set geometry - xtb.set_geometry(geom) + rdw.set_geometry(geom) # Check class instance - assert isinstance(xtb.geom, Geometry) + assert isinstance(rdw.geom, Geometry) # Check basename attribute assert geom.basename == expected - # Clean up - xtb.temp_dir.cleanup() + @pytest.mark.parametrize( + "method, numConfs", + [("distance", "test"), ("etkdg", "test")], + ) + def test_configure_failure(self, rdw, method, numConfs): + with pytest.raises(ValueError): + rdw.configure(method=method, numConfs=numConfs) - @pytest.mark.parametrize("fmt", [("xyz"), ("pdb"), ("mol")]) - def test_save_geometry(self, xtb, fmt): + @pytest.mark.parametrize( + "method, numConfs, pruneRmsThresh, forceTol, randomSeed", + [ + ("distance", 10, -1.0, 0.001, -1), + ], + ) + def test_configure_distance( + self, rdw, method, numConfs, pruneRmsThresh, forceTol, randomSeed + ): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry - xtb.set_geometry(geom) + rdw.set_geometry(geom) - # Save geometry - xtb.save_geometry(fmt=fmt) - - # Check if file exists - assert os.path.exists( - os.path.join( - xtb.temp_dir.name, "{}.{}".format(xtb.geom.basename, fmt.lower()) - ) + # Set up commandline + rdw.configure( + method=method, + numConfs=numConfs, + pruneRmsThresh=pruneRmsThresh, + forceTol=forceTol, + randomSeed=randomSeed, ) - # Clean up - xtb.temp_dir.cleanup() - @pytest.mark.parametrize( - "tasks, forcefield, optlevel", - [ - ("optimize", ["gff", "gfn2"], ["Normal", "Tight"]), - (["crest", "protonate"], "gfn2", "Normal"), - ], + "method, numConfs", + [("etkdg", 10), ("etdg", 10)], ) - def test_configure_failure(self, xtb, tasks, forcefield, optlevel): - with pytest.raises(TypeError): - xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) - - @pytest.mark.parametrize( - "tasks,forcefield,optlevel", - [ - ("optimize", "gff", "Normal"), - ("crest", "gff", "Normal"), - ("protonate", "gff", "Normal"), - ], - ) - def test_configure(self, xtb, tasks, forcefield, optlevel): + def test_configure_et(self, rdw, method, numConfs): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry - xtb.set_geometry(geom) - - # Save geometry - xtb.save_geometry(fmt="xyz") + rdw.set_geometry(geom) # Set up commandline - xtb.configure(task=tasks, forcefield=forcefield, optlevel=optlevel) + rdw.configure(method=method, numConfs=numConfs) - # Clean up - xtb.temp_dir.cleanup() - - def test_run(self, xtb): + def test_submit(self, rdw): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry - xtb.set_geometry(geom) - - # Save geometry - xtb.save_geometry(fmt="xyz") + rdw.set_geometry(geom) # Set up commandline - xtb.configure() + rdw.configure() # Run - xtb.run() + rdw.submit() - # Clean up - xtb.temp_dir.cleanup() - - def test_finish(self, xtb): + def test_finish(self, rdw): # Load geometry externally - geom = isicle.geometry.load(localfile("resources/geom_test_3D.mol")) + geom = isicle.load(localfile("resources/geom_test_3D.mol")) # Set geometry - xtb.set_geometry(geom) - - # Save geometry - xtb.save_geometry(fmt="xyz") + rdw.set_geometry(geom) # Set up commandline - xtb.configure() + rdw.configure() # Run - xtb.run() + rdw.submit() # Finish - xtb.finish() - - # Clean up - xtb.temp_dir.cleanup() + rdw.finish() From ea7c32cd3f4948b8dcb4df98e466030278a6234f Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 13:42:11 -0800 Subject: [PATCH 17/18] Add type hints, fix exception raising in RDKitWrapper --- isicle/md.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/isicle/md.py b/isicle/md.py index 1e4836f..06cb3d6 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -519,7 +519,7 @@ def set_geometry(self, geom): self.geom = geom self.basename = self.geom.basename - def configure(self, method="distance", numConfs=10, **kwargs): + def configure(self, method: str = "distance", numConfs: int = 10, **kwargs): """ Set conformer generation parameters. Parameters @@ -545,17 +545,23 @@ def configure(self, method="distance", numConfs=10, **kwargs): "etkdgv3": self._configure_etkdg3, "sretkdgv3": self._configure_etkdg3_variant, } + method = str(method) try: lookup[method.lower()](**kwargs) except KeyError: - raise CustomException( - "RDKit supports distance, ETDG, ETKDG, ETKDGv2, ETKDGv3, srETKDGv3 methods." - ) + raise self.method = method.lower() + try: + numConfs = int(numConfs) + except ValueError: + raise self.numConfs = numConfs def _configure_distance_geometry( - self, pruneRmsThresh=-1.0, forceTol=0.001, randomSeed=-1 + self, + pruneRmsThresh: float = -1.0, + forceTol: float = 0.001, + randomSeed: int = -1, ): """ Set parameters for distance geometry based conformer generation. @@ -625,7 +631,7 @@ def submit(self): self.geom.mol, numConfs=self.numConfs, params=self.params ) else: - raise CustomException( + raise ValueError( "Failure to run RDKit MD, method and/or variant not recognized" ) From 1070db6ccd4301e233b1bb2764d553a828a95367 Mon Sep 17 00:00:00 2001 From: jessbade Date: Wed, 7 Feb 2024 13:51:04 -0800 Subject: [PATCH 18/18] Fix linting spacing --- isicle/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isicle/parse.py b/isicle/parse.py index c7544b4..8f2850f 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -393,7 +393,7 @@ def _parse_connectivity(self): def parse(self): """ - Extract relevant information from NWChem output + Extract relevant information from NWChem output. Parameters ----------