diff --git a/isicle/adducts.py b/isicle/adducts.py index dac76c5..8e692bf 100644 --- a/isicle/adducts.py +++ b/isicle/adducts.py @@ -301,7 +301,7 @@ def set_charge(self): ''' if self.geom.__dict__.get('charge') is None: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): ''' @@ -472,7 +472,7 @@ def _update_geometry_charge(self, geom): ''' ''' - geom.__dict__.update(charge=geom.get_formal_charge()) + geom.__dict__.update(charge=geom.get_charge()) def _add_ion(self, init_mol, base_atom_idx, ion_atomic_num, sanitize, forcefield, ff_iter): ''' @@ -983,9 +983,9 @@ def set_charge(self): if self.geom.load.get('filetype') == '.xyz': # self.geom.__dict__.update(charge=charge) raise ValueError( - 'Must first run geom.set_formal_charge for an xyz structure') + 'Must first run geom.set_charge for an xyz structure') else: - self.geom.__dict__.update(charge=self.geom.get_formal_charge()) + self.geom.__dict__.update(charge=self.geom.get_charge()) def _set_ions(self, ion_path=None, ion_list=None): cations, anions, complex = _parse_ions( diff --git a/isicle/conformers.py b/isicle/conformers.py index 504435a..ff0b59b 100644 --- a/isicle/conformers.py +++ b/isicle/conformers.py @@ -379,7 +379,9 @@ def __init__(self, *args): ---------- *args Objects to comprise the conformational ensemble. + ''' + super().__init__((Geometry, XYZGeometry), *args) def _check_attributes(self, attr): @@ -395,7 +397,9 @@ def _check_attributes(self, attr): ------ AttributeError If all members do not have `attr`. + ''' + value = [x.get___dict__() for x in self] for key in safelist(attr): if not all(key in x for x in value): @@ -421,7 +425,9 @@ def reduce(self, attr, func='boltzmann', **kwargs): ------- :obj:`~pandas.DataFrame` Result of reduction operation. + ''' + # Select reduction function f = _function_selector(func) @@ -493,7 +499,9 @@ def _apply_method(self, method, **kwargs): ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `method` return type. + ''' + # Check for attribute if not all(hasattr(x, method) for x in self): raise AttributeError('"{}" not found for all conformational ' @@ -525,7 +533,9 @@ def _apply_function(self, func, **kwargs): ------- :obj:`~isicle.conformers.ConformationalEnsemble` or list Result of operation, type depends on `func` return type. + ''' + # Apply method to collection result = [func(x, **kwargs) for x in self] @@ -560,7 +570,9 @@ def apply(self, func=None, method=None, **kwargs): ------ ValueError If neither `func` nor `method` is supplied. + ''' + # Apply function if func is not None: return self._apply_function(func, **kwargs) @@ -579,7 +591,9 @@ def get_structures(self): ------- :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. + ''' + # Check for geom attribute self._check_attributes('geom') diff --git a/isicle/geometry.py b/isicle/geometry.py index 57df6f8..6f0f2b3 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -1,123 +1,66 @@ import os -import isicle import numpy as np -import pandas as pd -from isicle.interfaces import GeometryInterface, XYZGeometryInterface +from openbabel import pybel from rdkit import Chem from rdkit.Chem.MolStandardize import rdMolStandardize from rdkit.Chem.SaltRemover import SaltRemover -from openbabel import pybel + +import isicle +from isicle.interfaces import GeometryInterface, XYZGeometryInterface + class XYZGeometry(XYZGeometryInterface): """ Molecule information, specialized for XYZ files (bonding info not provided). - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- - xyz: list of str - Lines of XYZ block used to represent this compound's structure - global_properties : dict - Dictionary of properties calculated for this structure. Always at least - contains the "from" key, which reports the last operation performed on - this compound (e.g. load, dft_optimize). To generate compound - properties, use get_* and *_optimize functions. + It is not recommended to manipulate or retrieve attributes of this class + without using class functions. """ _defaults = ["xyz"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) - self.__dict__.update(kwargs) - - def _upgrade_to_Geometry(self, mol): """ - Upgrade this class to the Geometry class using a provided mol object. - Hard copies everything but the xyz structure. + Initialize :obj:`~isicle.geometry.XYZGeometry` instance. Parameters ---------- - mol : RDKit Mol object - Structure to use in new Geometry object - - Returns - ------- - Geometry - Instance containing the new mol representation. + kwargs + Keyword arguments to initialize instance attributes. """ - if mol is None: - mol = self.mol - # Create dict to load in - d = self.__dict__.copy() - d["mol"] = mol - d.pop("xyz") - - return Geometry(**d) + self.__dict__.update(dict.fromkeys( + self._defaults, self._default_value)) + self.__dict__.update(kwargs) - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None, event=None - ): + def get_basename(self): """ - Return object with given structure. If inplace and a xyz structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. - - Parameters - ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. + Returns a copy of this object's basename (original filename). Returns ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. + str + Basename of original filepath. """ - if mol is not None: - # Upgrade to the Geometry class - geom = self._upgrade_to_Geometry(mol) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - - # Ensure exactly one of xyz or a filename is provided - # Add as structure of the new XYZGeometry - if xyz_filename is not None: - geom.xyz = load_xyz(xyz_filename) - elif xyz is not None: - geom.xyz = xyz - - return geom - - def get_basename(self): - """Returns a copy of this object's basename (original filename).""" return self.basename def add___dict__(self, d, override=False): - """Accepts a dictionary of values and adds any non-conflicting - information to the attribute dictionary""" + """ + Accepts a dictionary of values and adds any non-conflicting + information to the attribute dictionary. + + Parameters + ---------- + d : dict + Attribute dictionary. + override : bool + Singal whether to override existing attributes. + + """ if not override: # Remove keys that are already present in attribute dictionary @@ -125,32 +68,50 @@ def add___dict__(self, d, override=False): # Add to attributes self.__dict__.update(d) - return - def dft(self, program="NWChem", template=None, **kwargs): + def dft(self, program='NWChem', **kwargs): """ - Optimize geometry from XYZ, using stated functional and basis set. - Additional inputs can be grid size, optimization criteria level, + Perform density functional theory calculations according to supplied task list + and configuration parameters for specified program (currently only NWChem). + + Parameters + ---------- + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. + + Returns + ------- + :obj:`~isicle.qm.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.qm.dft( - self.__copy__(), program=program, template=template, **kwargs - ) - return geom + return isicle.qm.dft(self, program=program, **kwargs) def md(self, program="xtb", **kwargs): """ - Optimize geometry or generate conformers or adducts from XYZ using stated forcefield. - Additional inputs can be energy window, optimization criteria level, charge, or ion. + Optimize geometry or generate conformers or adducts using stated forcefield. + + Parameters + ---------- + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`. + + Returns + ------- + :obj:`~isicle.md.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. + """ - geom = isicle.md.md(self.__copy__(), program=program, **kwargs) - return geom + return isicle.md.md(self.__copy__(), program=program, **kwargs) - def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): + def ionize(self, ion_path=None, ion_list=None, **kwargs): """ - Calls xtb CREST to ionize xyz geometry, using specified list of ions and method of ionization. - WARNING: must run isicle.geometry.XYZGeometry.set_formal_charge prior to this. + Calls xtb CREST to ionize XYZ geometry, using specified list of ions and method of ionization. + WARNING: must run isicle.geometry.XYZGeometry.set_charge prior to this. Parameters ---------- @@ -161,42 +122,57 @@ def ionize(self, ion_path=None, ion_list=None, save=False, path=None, **kwargs): List of strings of adducts to be considered. Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` Either ion_path or ion_list must be specified - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files - **kwargs : - see :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` + kwargs + See :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- Dictionary of adducts, `{:[]}` + """ + if self.__dict__.get("charge") is None: raise ValueError( - "Must first run isicle.geometry.XYZGeometry.set_formal_charge for an xyz structure" + "Must first run isicle.geometry.XYZGeometry.set_charge for an XYZ structure" ) iw = isicle.adducts.ionize("crest").run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw - def set_formal_charge(self, charge): - """Specify and set the known formal charge of the xyz molecule to __dict__""" + def set_charge(self, charge): + """ + Specify and set the known formal charge of the XYZ molecule. + + Parameters + ---------- + charge : int + Nominal charge of the molecule. + + """ + + # Attempt cast to int try: charge = int(charge) except: - raise TypeError("Charge specifed must be str or int") + raise TypeError("Charge specifed must be str or int.") + + # Update attribute self.__dict__.update(charge=charge) - return self.charge def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + self.__dict__["natoms"] = int(self.xyz[0].strip()) return self.__dict__["natoms"] @@ -213,35 +189,95 @@ def get_atom_indices(self, atoms=["C", "H"]): ------- list of int Atom indices. + """ + + # Index container idx = [] + # Enumerate rows for i in range(2, len(self.xyz)): + # Extract atom atom = self.xyz[i].split(" ")[0] + + # Check if of interest if atom in atoms: + # Append index idx.append(i - 2) + return idx def get___dict__(self): - """Return a copy of this object's attribute dictionary""" + """ + Return a copy of this object's attributes as a dictionary. + + Returns + ------- + dict + Instance attributes. + + """ + return self.__dict__.copy() - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + def __copy__(self, **kwargs): + """ + Copy this class instance. + + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.XYZGeometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - return type(self)(**d) + + # Safely copy mol if present + if 'mol' in d: + d['mol'] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_xyzblock(self): - """Get XYZ text for this structure.""" + """ + Get XYZ text for this structure. + + Returns + ------- + str + XYZ text. + + """ + return "\n".join(self.xyz) def to_mol(self): + """ + Get structure representation in :obj:`~rdkit.Chem.rdchem.Mol` format. + + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. + + """ + self.temp_dir = isicle.utils.mkdtemp() geomfile = os.path.join(self.temp_dir, - '{}.{}'.format(self.basename, - "xyz")) + '{}.{}'.format(self.basename, + "xyz")) isicle.io.save(geomfile, self) mols = list(pybel.readfile("xyz", geomfile)) @@ -252,122 +288,56 @@ def to_mol(self): class Geometry(XYZGeometry, GeometryInterface): """ - Molecule information, specialized for 3D representations. - It is not recommended to manipulate or retrieve - attributes of this class without using class functions. - - Attributes - ---------- = - mol : RDKit Mol object - Current structure. - __dict__ : dict - Dictionary of properties calculated for this structure. + Molecule information, specialized for 3D representations. It is not + recommended to manipulate or retrieve attributes of this class without + using class functions. """ _defaults = ["mol"] _default_value = None def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) + self.__dict__.update(dict.fromkeys( + self._defaults, self._default_value)) self.__dict__.update(kwargs) - - def _downgrade_to_XYZGeometry(self, xyz): + def _is_embedded(self): """ - Downgrade this class to the XYZGeometry class using a provided xyz. - Hard copies everything but the mol structure. - - Parameters - ---------- - xyz : list of str - Lines of xyz block to use for new object. + Check if molecule has embedded 3D coordinates. Returns ------- - XYZGeometry - Instance containing the new xyz representation. + bool + Indication of 3D coordinate presence. """ - # Create dict to load in - d = self.__dict__.copy() - d["xyz"] = xyz - - return XYZGeometry(**d) + try: + self.mol.GetConformers() + return True + except: + return False - def _update_structure( - self, inplace, mol=None, xyz=None, xyz_filename=None): + def initial_optimize(self, embed=False, forcefield="UFF", ff_iter=200): """ - Return object with given structure. If inplace and a mol structure is - provided, manipulates the current instance. Otherwise, a new - XYZGeometry or Geometry object is created with hard copied attributes. - Only one of mol, xyz, or xyz_filename is to be provided. If multiple - are provided, they are prioritized in the following order: (1) mol, - (2) xyz_filename, (3) xyz. If none of these are provided, an error is - raised. + Initial molecule optimization by basic force field to establish rough + 3D coordinates. Further optimization (molecular dynamics, density + functional theory) recommended. Parameters ---------- - inplace : boolean - If true, update this instance with the new structure. Otherwise, - create a new XYZGeometry or Geometry instance and populate it with - the structure. - mol : RDKit Mol object (optional) - Structure with 3D abilities. - xyz_filename: str (optional) - Path to file to load in xyz from - xyz: list of str (optional) - Lines from xyz block to use as structure representation. - - Returns - ------- - XYZGeometry or Geometry - Updated structure instance. Geometry is returned if mol was - provided, otherwise XYZGeometry is returned. - - """ - - if mol is None: - - if xyz_filename is not None: - # Load in file first - xyz = load_xyz(xyz_filename) - - if xyz is not None: - # Downgrade to XYZGeometry class - geom = self._downgrade_to_XYZGeometry(xyz) - - else: - if inplace: # Modify this object - geom = self - else: # Make a new object - geom = self.__copy__() - geom.mol = mol - - # Add event that led to this change in structure - # If event is None, nothing will happen - - return geom - - def initial_optimize( - self, embed=False, forcefield="UFF", ff_iter=200, inplace=False - ): - """ - Params - ------ embed: bool - Try embedding molecule with eigenvalues of distance matrix - Except failure seeds with random coordinates + Attempt molecule embedding with eigenvalues of distance matrix. + Failure results in seeding with random coordinates. forcefield: str - Specify "UFF" for Universal Force Field optimization by RDKit - Specify "MMFF" or "MMFF94" for Merck Molecular Force Field 94 - Specify "MMFF94s" for the MMFF94 s variant - inplace: bool - If geom.mol is modified inplace + Specify "UFF" for universal force field, "MMFF" or "MMFF94" for + Merck molecular force field 94, or "MMFF94s" for the MMFF94 s variant. + Returns ------- - Geometry - Updated structure instance + :obj:`~isicle.geometry.Geometry` + Molecule representation. + """ def _forcefield_selector(forcefield, mw): @@ -379,7 +349,8 @@ def _forcefield_selector(forcefield, mw): if Chem.rdForceFieldHelpers.UFFHasAllMoleculeParams(mw) is True: return Chem.AllChem.UFFOptimizeMolecule else: - raise ValueError("UFF is not available for all atoms in molecule.") + raise ValueError( + "UFF is not available for all atoms in molecule.") elif forcefield in ["mmff", "mmff94", "mmff94s"]: if Chem.rdForceFieldHelpers.MMFFHasAllMoleculeParams(mw) is True: return Chem.rdForceFieldHelpers.MMFFOptimizeMolecule @@ -392,25 +363,41 @@ def _forcefield_selector(forcefield, mw): "RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields." ) + # Get rdkit mol mol = self.to_mol() - if embed == True: + + # Embed molecule 3D coordinates + if embed is True: + # Attempt embedding try: Chem.AllChem.EmbedMolecule(mol) + + # Use random coordinates except: Chem.AllChem.EmbedMolecule(mol, useRandomCoords=True) + + # Optimize according to supplied forcefield if forcefield is not None: - if "mmff94s" in forcefield.lower(): - _forcefield_selector(forcefield, mol)( - mol, mmffVariant=forcefield, maxIters=ff_iter - ) - else: - _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) - geom = self._update_structure(inplace, mol=mol) + # Check if embedded + if self._is_embedded(): + + # Forcefield selection + if "mmff94s" in forcefield.lower(): + _forcefield_selector(forcefield, mol)( + mol, mmffVariant=forcefield, maxIters=ff_iter + ) + else: + _forcefield_selector(forcefield, mol)(mol, maxIters=ff_iter) + + # Not embedded + else: + raise ValueError('Molecule must have embedded 3D coordinates.') - return geom + # Return copy with updated mol + return self.__copy__(mol=mol) - def desalt(self, salts=None, inplace=False): + def desalt(self, salts=None): """ Desalts RDKit mol object using Chem.SaltRemover module. @@ -418,22 +405,18 @@ def desalt(self, salts=None, inplace=False): ---------- salts : str (optional) Salt type to remove. Ex: 'Cl', 'Br', '[Na+]'. Default: None. - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry - Molecule with given salt(S) removed. + :obj:`~isicle.geometry.Geometry` + Moleculerepresentation. """ # TODO: should this raise an error instead? # If no salts given, skip desalting if salts is None: - return self._update_structure(inplace, mol=self.to_mol()) + return self.__copy__() remover = SaltRemover(defnFormat="smiles", defnData=salts) # defnData="[Cl,Br,Na]" *sample definition of salts to be removed* @@ -446,26 +429,16 @@ def desalt(self, salts=None, inplace=False): # atomno = res.GetNumAtoms # if relevant to future use, returns atom count post desalting - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? + return self.__copy__(mol=mol) - return geom - - def neutralize(self, inplace=False): + def neutralize(self): """ Neutralizes RDKit mol object using neutralization reactions. - Parameters - ---------- - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. - Returns ------- - Geometry - Neutralized form of the molecule. + :obj:`~isicle.geometry.Geometry` + Molecule representation. """ @@ -495,6 +468,8 @@ def _initialize_neutralisation_reactions(): reactions = _initialize_neutralisation_reactions() mol = self.to_mol() + + # TODO: `replaced` is unused replaced = False for i, (reactant, product) in enumerate(reactions): while mol.HasSubstructMatch(reactant): @@ -502,12 +477,9 @@ def _initialize_neutralisation_reactions(): rms = Chem.AllChem.ReplaceSubstructs(mol, reactant, product) mol = rms[0] - geom = self._update_structure(inplace, mol=mol) - # TODO: add any properties from this operation to global_props? - - return geom + return self.__copy__(mol=mol) - def tautomerize(self, return_all=False, inplace=False): + def tautomerize(self, return_all=False): """ Generate tautomers according to RDKit TautomerEnumerator() method. @@ -516,14 +488,10 @@ def tautomerize(self, return_all=False, inplace=False): return_all : boolean (optional) If true, return all tautomers generated. Otherwise, only return the most common. Default=False - inplace : boolean (optional) - If true, update this instance with the new structure. Otherwise, - create a new Geometry instance and populate it with the structure. - Default: False. Returns ------- - Geometry or list(Geometry) + :obj:`~isicle.geometry.Geometry` or list of :obj:`~isicle.geometry.Geometry` Tautomer(s) of starting structure. """ @@ -536,81 +504,77 @@ def tautomerize(self, return_all=False, inplace=False): res = [mol] tauts = enumerator.Enumerate(mol) smis = [Chem.MolToSmiles(x) for x in tauts] - s_smis = sorted((x, y) for x, y in zip(smis, tauts) if x != self.to_smiles()) + s_smis = sorted((x, y) + for x, y in zip(smis, tauts) if x != self.to_smiles()) res += [y for x, y in s_smis] # Ensure res is a list of mol objects if return_all: new_geoms = [] for r in res: - geom = self._update_structure(False, mol=r, event="tautomerize") + geom = self.__copy__(mol=r) new_geoms.append(geom) return new_geoms - geom = self._update_structure(inplace, mol=res[0]) - # TODO: add any properties from this operation to global_props? + return self.__copy__(mol=res[0]) - return geom - - def kekulize(self, inplace=False): + def kekulize(self): """ - `Kekulizes the molecule if possible. Otherwise the molecule is not modified` + Kekulizes the molecule if possible. Otherwise the molecule is not modified. This is recommended for aromatic molecules undergoing explicit ionization. Aromatic bonds are explicitly defined and aromatic flags are cleared. + """ + mol = Chem.KekulizeIfPossible(self.to_mol(), clearAromaticFlags=True) - geom = self._update_structure(inplace, mol=mol, event="kekulize") - return geom - - def ionize( - self, - ion_path=None, - ion_list=None, - method="explicit", - save=False, - path=None, - **kwargs, - ): + return self.__copy__(mol=mol) + + def ionize(self, ion_path=None, ion_list=None, method="explicit", **kwargs): """ Ionize geometry, using specified list of ions and method of ionization. Parameters ---------- ion_path : str - Filepath to text file containing ions with charge (eg. `H+`) to be considered - Either ion_path or ion_list must be specified + Filepath to text file containing ions with charge (eg. `H+`) to be + considered. Either ion_path or ion_list must be specified. ion_list : list - List of strings of adducts to be considered. - Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` - Either ion_path or ion_list must be specified + List of strings of adducts to be considered. Must be specifed in + syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+`. Either ion_path + or ion_list must be specified method : str Method of ionization to be used, 'explicit' or 'crest' is accepted - save : bool - Saves wrapper object to .pkl in specified path directory - path : str (optional) - Directory to write output files ensembl : bool (optional) Returns instead a list of adduct geometries - **kwargs: + kwargs: see :meth: `~isicle.adducts.ExplicitIonizationWrapper.submit` or `~isicle.adducts.CRESTIonizationWrapper.submit` for more options Returns ------- - Dictionary of adducts, `{:[]}` + :obj:`~isicle.adducts.IonizationWrapper` + Ionization result. + """ + iw = isicle.adducts.ionize(method).run( self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs ) - if save == True and path is not None: - iw.save(path) - return iw def get_natoms(self): - """Calculate total number of atoms.""" + """ + Calculate total number of atoms. + + Returns + ------- + int + Number of atoms. + + """ + natoms = Chem.Mol.GetNumAtoms(self.to_mol()) self.__dict__["natoms"] = natoms return self.__dict__["natoms"] @@ -632,6 +596,7 @@ def get_atom_indices( ------- list of int Atom indices. + """ atoms = [lookup[x] for x in atoms] @@ -643,7 +608,16 @@ def get_atom_indices( return idx def get_total_partial_charge(self): - """Sum the partial charge across all atoms.""" + """ + Sum the partial charge across all atoms. + + Returns + ------- + float + Total partial charge. + + """ + mol = self.to_mol() Chem.AllChem.ComputeGasteigerCharges(mol) contribs = [ @@ -652,80 +626,151 @@ def get_total_partial_charge(self): ] return np.nansum(contribs) - def get_formal_charge(self): - """Calculate formal charge of the molecule.""" - mol = self.to_mol() - return Chem.rdmolops.GetFormalCharge(mol) + def get_charge(self): + """ + Get formal charge of the molecule. + + Returns + ------- + int + Formal charge. + + """ + + return Chem.rdmolops.GetFormalCharge(self.to_mol()) + + def set_charge(self, charge=None): + """ + Set formal charge of the molecule. + + Parameters + ---------- + charge : int + Formal charge of molecule. + + """ - def set_formal_charge(self): - """Set formal charge of the molecule to __dict__""" - self.__dict__.update(charge=self.get_formal_charge()) - return self.charge + if charge is None: + charge = self.get_charge() + + self.__dict__.update(charge=int(charge)) + + def __copy__(self, **kwargs): + """ + Return hard copy of this class instance. - def __copy__(self): - """Return hard copy of this class instance.""" - # TODO: manage what should be passed, rather than all? + Parameters + ---------- + kwargs + Keyword arguments to update copy. + + Returns + ------- + :obj:`~isicle.geometry.Geometry` + Molecule representation. + + """ + + # Copy attributes d = self.__dict__.copy() - d["mol"] = self.to_mol() - return type(self)(**d) + + # Safely copy mol if present + if 'mol' in d: + d['mol'] = self.to_mol() + + # Build self instance from scratch + instance = type(self)(**d) + + # Update from kwargs + instance.__dict__.update(kwargs) + + return instance def to_mol(self): """ - Returns RDKit Mol object for this Geometry. + Returns :obj:`~rdkit.Chem.rdchem.Mol` instance for this Geometry. Returns ------- - RDKit Mol object - Copy of structure + :obj:`~rdkit.Chem.rdchem.Mol` + RDKit Mol instance. """ return self.mol.__copy__() def to_smiles(self): - """Get SMILES for this structure.""" + """ + Get SMILES for this structure. + + Returns + ------- + str + SMILES string. + + """ + return Chem.MolToSmiles(self.to_mol()) def to_inchi(self): - """Get InChI for this structure.""" + """ + Get InChI for this structure. + + Returns + ------- + str + InChI string. + + """ + return Chem.MolToInchi(self.to_mol()) def to_smarts(self): - """Get SMARTS for this structure.""" + """ + Get SMARTS for this structure. + + Returns + ------- + str + SMARTS string. + """ return Chem.MolToSmarts(self.to_mol()) def to_xyzblock(self): - """Get XYZ text for this structure.""" - return Chem.MolToXYZBlock(self.to_mol()) + """ + Get XYZ text for this structure. - def to_pdbblock(self): - """Get PDB text for this structure""" - return Chem.MolToPDBBlock(self.to_mol()) + Returns + ------- + str + XYZ representation as string. - def to_molblock(self): - """Get PDB text for this structure""" - return Chem.MolToMolBlock(self.to_mol()) + """ + return Chem.MolToXYZBlock(self.to_mol()) -class MDOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with MD optimized 3D coordinates. Any methods that would - result in a more defined representation (e.g. DFT optimized) should yield - the appropriate subclass. - """ + def to_pdbblock(self): + """ + Get PDB text for this structure. + + Returns + ------- + str + PDB representation as string. - def __init__(self): - # Initialize the base class - super().__init__() + """ + return Chem.MolToPDBBlock(self.to_mol()) -class DFTOptimizedGeometry(Geometry): - """ - Builds off of the 3D representation, with additional methods specific to a - representation with DFT optimized 3D coordinates. - """ + def to_molblock(self): + """ + Get :obj:`~rdkit.Chem.rdchem.Mol` text for this structure. + + Returns + ------- + str + :obj:`~rdkit.Chem.rdchem.Mol` representation as string. - def __init__(self): - # Initialize the base class - super().__init__() + """ + + return Chem.MolToMolBlock(self.to_mol()) diff --git a/isicle/interfaces.py b/isicle/interfaces.py index 377a747..709420a 100644 --- a/isicle/interfaces.py +++ b/isicle/interfaces.py @@ -2,10 +2,10 @@ class FileParserInterface(metaclass=abc.ABCMeta): - ''' + """ Abstract base class for file parser interface. All file parsers conform to this definition. - ''' + """ @classmethod def __subclasshook__(cls, subclass): @@ -19,17 +19,17 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError @abc.abstractmethod def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError @abc.abstractmethod def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError @@ -51,47 +51,41 @@ def __subclasshook__(cls, subclass): and callable(subclass.__copy__) and hasattr(subclass, 'to_xyzblock') and callable(subclass.to_xyzblock) - and hasattr(subclass, 'save_xyz') - and callable(subclass.save_xyz) - and hasattr(subclass, 'save_pickle') - and callable(subclass.save_pickle) - and hasattr(subclass, 'save') - and callable(subclass.save) or NotImplemented) @abc.abstractmethod def dft(self): - '''Optimize geometry using density function theory (DFT) methods.''' + """Optimize geometry using density function theory (DFT) methods.""" raise NotImplementedError @abc.abstractmethod def md(self): - '''Optimize geometry using molecule dynamics methods (MD).''' + """Optimize geometry using molecule dynamics methods (MD).""" raise NotImplementedError @abc.abstractmethod def get_natoms(self): - '''Count number of atoms''' + """Count number of atoms""" raise NotImplementedError @abc.abstractmethod def get_atom_indices(self): - '''Extract indices of each atom from the internal geometry.''' + """Extract indices of each atom from the internal geometry.""" raise NotImplementedError @abc.abstractmethod def get___dict__(self): - '''Return a copy of this object's attributes dictionary''' + """Return a copy of this object's attributes dictionary""" raise NotImplementedError @abc.abstractmethod def __copy__(self): - '''Return hard copy of this class instance.''' + """Return hard copy of this class instance.""" raise NotImplementedError @abc.abstractmethod def to_xyzblock(self): - '''Get XYZ text for this structure.''' + """Get XYZ text for this structure.""" raise NotImplementedError @@ -113,35 +107,34 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def to_mol(self, path: str): - '''Returns RDKit Mol object for this Geometry.''' + """Returns RDKit Mol object for this Geometry.""" raise NotImplementedError @abc.abstractmethod def get_total_partial_charge(self): - '''Determine total partial charge of molecule''' + """Determine total partial charge of molecule""" raise NotImplementedError @abc.abstractmethod def to_smiles(self, path: str): - '''Return SMILES representation''' + """Return SMILES representation""" raise NotImplementedError @abc.abstractmethod def to_inchi(self, path: str): - '''Return InChI representation''' + """Return InChI representation""" raise NotImplementedError @abc.abstractmethod def to_smarts(self, path: str): - '''Return SMARTS representation''' + """Return SMARTS representation""" raise NotImplementedError class WrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for wrapper interface. All QM - wrappers conform to this definition. - ''' + """ + Abstract base class for wrapper interfaces. + """ @classmethod def __subclasshook__(cls, subclass): @@ -159,64 +152,25 @@ def __subclasshook__(cls, subclass): @abc.abstractmethod def set_geometry(self): - '''Load the input geometry file''' + """Assign the input geometry.""" raise NotImplementedError @abc.abstractmethod def configure(self): - '''Configure the run.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def submit(self): - '''Configure the run.''' - raise NotImplementedError - - @abc.abstractmethod - def run(self): - '''Execute/submit the run.''' - raise NotImplementedError - - @abc.abstractmethod - def finish(self, path: str): - '''Finalize, parse, return result object.''' - raise NotImplementedError - - -class MDWrapperInterface(metaclass=abc.ABCMeta): - ''' - Abstract base class for molecular dynamics wrapper interface. All QM - wrappers conform to this definition. - ''' - - @classmethod - def __subclasshook__(cls, subclass): - return (hasattr(subclass, 'set_geometry') - and callable(subclass.set_geometry) - and hasattr(subclass, 'job_type') - and callable(subclass.job_type) - and hasattr(subclass, 'run') - and callable(subclass.run) - and hasattr(subclass, 'finish') - and callable(subclass.finish) - or NotImplemented) - - @abc.abstractmethod - def set_geometry(self): - '''Load the input geometry file''' - raise NotImplementedError - - @abc.abstractmethod - def job_type(self): - '''Make list of jobs.''' + """Configure the run.""" raise NotImplementedError @abc.abstractmethod def run(self): - '''Execute/submit the run.''' + """Execute/submit the run.""" raise NotImplementedError @abc.abstractmethod def finish(self, path: str): - '''Finalize, parse, return result object.''' + """Finalize, parse, return result object.""" raise NotImplementedError diff --git a/isicle/io.py b/isicle/io.py index a60a5c6..2fca531 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -153,7 +153,6 @@ def load_nwchem(path, basename=None): """ - dft = isicle.qm.NWChemWrapper() parser = isicle.parse.NWChemParser() @@ -305,6 +304,7 @@ def load_smiles(path, force=False, basename=None): Molecule representation. """ + extension = os.path.splitext(path)[-1].lower() if "smi" in extension: return _load_line_notation(path, func=Chem.MolFromSmiles, force=force, basename=basename) @@ -331,6 +331,7 @@ def load_inchi(path, force=False, basename=None): Molecule representation. """ + if "inchi=" in path.lower(): return _load_line_notation( path, func=Chem.MolFromInchi, force=force, string=True, basename=basename @@ -386,7 +387,7 @@ def _check_mol_obj(mol_obj): try: Chem.MolToMolBlock(mol_obj) except ValueError: - print("Invalid RDKit mol object") + print("Invalid RDKit Mol object") raise @@ -443,6 +444,7 @@ def load(path, force=False, basename=None): Molecule representation. """ + if (type(path)) == str: path = path.strip() extension = os.path.splitext(path)[-1].lower() @@ -480,8 +482,6 @@ def load(path, force=False, basename=None): raise IOError("Not a valid RDKit mol object passed.") - - def save_xyz(path, geom): """ Save molecule geometry as XYZ file. diff --git a/isicle/md.py b/isicle/md.py index 1fa0a18..aaa94e2 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -10,14 +10,14 @@ from isicle.interfaces import WrapperInterface from isicle.parse import XTBParser -''' +""" Files resulting from an xtb job always run in the same directory that the command is issued in, no matter where the input is. Can direct the .log file, but no other files. -''' +""" def _program_selector(program): - ''' + """ Selects a supported molecular dynamics program for associated simulation. Currently only NWChem has been implemented. @@ -32,7 +32,7 @@ def _program_selector(program): Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.MDWrapperInterface`. - ''' + """ program_map = {'xtb': XTBWrapper, 'rdkit': RDKitWrapper, 'tinker': TINKERWrapper} @@ -44,7 +44,7 @@ def _program_selector(program): def md(geom, program='xtb', **kwargs): - ''' + """ Optimize geometry via molecular dyanmics using supplied forcefield and basis set. @@ -58,14 +58,14 @@ def md(geom, program='xtb', **kwargs): result Object containing relevant outputs from the simulation. - ''' + """ # Select program return _program_selector(program).run(geom, **kwargs) class XTBWrapper(XYZGeometry, WrapperInterface): - ''' + """ Wrapper for xtb functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -84,7 +84,7 @@ class XTBWrapper(XYZGeometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -95,7 +95,7 @@ def __init__(self, **kwargs): self.__dict__.update(**kwargs) def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -103,7 +103,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -113,7 +113,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -122,7 +122,7 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() @@ -135,7 +135,7 @@ def save_geometry(self, fmt='xyz'): self.geom.path = geomfile def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solvation=None): - ''' + """ Set command line for xtb simulations. Parameters @@ -152,7 +152,7 @@ def _configure_xtb(self, forcefield='gfn2', optlevel='normal', charge=None, solv Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Add base command s = 'xtb ' @@ -184,7 +184,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', protonate=False, deprotonate=False, tautomerize=False, ion=None, charge=None, dryrun=False, processes=1, solvation=None, ignore_topology=False): - ''' + """ Set command line for crest simulations. Parameters @@ -215,7 +215,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ # Start base command s = 'crest ' @@ -275,7 +275,7 @@ def _configure_crest(self, ewin=6, optlevel='Normal', forcefield='gfn2', def configure(self, task='optimize', forcefield='gfn2', charge=None, ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, solvation=None, ignore_topology=False): - ''' + """ Generate command line Parameters @@ -300,7 +300,7 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, charge : int Charge of molecular system. Default : 0 (Neutral charge) - ''' + """ if type(task) == list: raise TypeError('Initiate one xtb or crest job at a time.') @@ -348,9 +348,9 @@ def configure(self, task='optimize', forcefield='gfn2', charge=None, self.config = config def submit(self): - ''' + """ Run xtb or crest simulation according to configured inputs. - ''' + """ owd = os.getcwd() os.chdir(self.temp_dir) job = self.config @@ -358,9 +358,9 @@ def submit(self): os.chdir(owd) def finish(self): - ''' + """ Parse results, save xtb output files, and clean temporary directory - ''' + """ parser = XTBParser() @@ -385,8 +385,10 @@ def finish(self): else: self.geom = self.geom[0] - def run(self, geom, **kwargs): - ''' + def run(self, geom, task='optimize', forcefield='gfn2', charge=None, + ewin=6, ion=None, optlevel='Normal', dryrun=False, processes=1, + solvation=None, ignore_topology=False): + """ Optimize geometry via density functional theory using supplied functional and basis set. @@ -394,16 +396,28 @@ def run(self, geom, **kwargs): ---------- geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.md.XTBWrapper.configure`. + tasks : str + Set task to "optimize", "conformer", "protonate", "deprotonate", or "tautomerize". + forcefield : str + GFN forcefield for the optimization, including "gfn2", "gfn1", "gff". + ewin : int + Energy window (kcal/mol) for conformer(set to 6), (de)protomer(set to 30), or tautomer(set to 30) search. + ion : str + Ion for protomer calculation. + optlevel : str or list of str + Set optimization level. Supply globally or per task. + ion : str + Keyword to couple with protonate to ionize molecule with an ion other than a proton. + See :obj:`~isicle.adduct.parse_ion` for list of ion options. + charge : int + Charge of molecular system. Defaults to 0 (neutral). Returns ------- :obj:`~isicle.md.XTBWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = XTBWrapper() @@ -423,7 +437,7 @@ def run(self, geom, **kwargs): return self def get_structures(self): - ''' + """ Extract all structures from containing object as a conformational ensemble. Returns @@ -431,7 +445,8 @@ def get_structures(self): :obj:`~isicle.conformers.ConformationalEnsemble` Conformational ensemble. - ''' + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): return self.geom @@ -439,7 +454,7 @@ def get_structures(self): 'Object does not contain multiple structures. Use `get_structure` instead.') def get_structure(self): - ''' + """ Extract structure from containing object. Returns @@ -447,7 +462,8 @@ def get_structure(self): :obj:`~isicle.geometry.XYZGeometry` Structure instance. - ''' + """ + if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): raise TypeError( 'Object contains multiple structures. Use `get_structures` instead.') @@ -456,8 +472,7 @@ def get_structure(self): class RDKitWrapper(Geometry, WrapperInterface): - - ''' + """ Wrapper for rdkit functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -476,7 +491,7 @@ class RDKitWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -504,7 +519,7 @@ def finish(self): class TINKERWrapper(Geometry, WrapperInterface): - ''' + """ Wrapper for TINKER functionality. Implements :class:`~isicle.interfaces.WrapperInterface` to ensure required methods are exposed. @@ -523,7 +538,7 @@ class TINKERWrapper(Geometry, WrapperInterface): job_list : str List of commands for simulation. - ''' + """ _defaults = ['geom'] _default_value = None @@ -535,7 +550,7 @@ def __init__(self, **kwargs): def _convert_to_tinkerxyz(self): - ''' + """ Convert mol to TINKER XYZ format using code from DP5, Goodman Lab. Parameters @@ -543,7 +558,7 @@ def _convert_to_tinkerxyz(self): geom : :obj: `~isicle.geometry.Geometry` Molecule representation. - ''' + """ # getting MMFF values for large atom types def getMMFF_large_atom_type(mmff_props , atom, m): @@ -690,7 +705,7 @@ def getMMFF_large_atom_type(mmff_props , atom, m): return xyz def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -698,7 +713,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -710,7 +725,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters @@ -719,7 +734,7 @@ def save_geometry(self, fmt='xyz'): Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", ".pdb", ".pkl". - ''' + """ # Path operationspyth self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() @@ -775,7 +790,7 @@ def finish(self): self.geom = self.geom[0] def run(self, geom, **kwargs): - ''' + """ Take geometry and conduct specified tasks using TINKER Parameters @@ -791,7 +806,7 @@ def run(self, geom, **kwargs): :obj:`~isicle.md.TINKERWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # New instance self = TINKERWrapper() @@ -809,6 +824,3 @@ def run(self, geom, **kwargs): self.finish() return self - - def finish(self): - return \ No newline at end of file diff --git a/isicle/mobility.py b/isicle/mobility.py index ec8d9bc..55a2e45 100644 --- a/isicle/mobility.py +++ b/isicle/mobility.py @@ -27,7 +27,7 @@ def __init__(self): pass def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -35,7 +35,7 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry self.geom = geom @@ -44,7 +44,7 @@ def set_geometry(self, geom): self.save_geometry() def save_geometry(self): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Raises @@ -52,7 +52,7 @@ def save_geometry(self): TypeError If geometry loaded from .xyz is saved to another format. - ''' + """ # Temp directory self.temp_dir = isicle.utils.mkdtemp() diff --git a/isicle/parse.py b/isicle/parse.py index 8a46935..8841032 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -9,7 +9,7 @@ import isicle class NWChemParser(FileParserInterface): - '''Extract text from an NWChem simulation output file.''' + """Extract text from an NWChem simulation output file.""" def __init__(self): self.contents = None @@ -17,7 +17,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path @@ -274,7 +274,7 @@ def _parse_molden(self): def _parse_protocol(self): - '''Parse out dft protocol''' + """Parse out dft protocol""" functional = [] basis_set = [] solvation = [] @@ -337,14 +337,15 @@ def _parse_connectivity(self): return connectivity def parse(self): - ''' - Extract relevant information from NWChem output + """ + Extract relevant information from NWChem output. Parameters ---------- to_parse : list of str geometry, energy, shielding, spin, frequency, molden, charge, timing - ''' + + """ # Check that the file is valid first if len(self.contents) == 0: @@ -419,21 +420,21 @@ def save(self, path): return class ImpactParser(FileParserInterface): - '''Extract text from an Impact mobility calculation output file.''' + """Extract text from an Impact mobility calculation output file.""" def __init__(self): self.contents = None self.result = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'rb') as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check CCS results == 1 count = 0 @@ -470,26 +471,26 @@ def parse(self): return result # TODO: return CCS? def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class MobcalParser(FileParserInterface): - '''Extract text from a MOBCAL mobility calculation output file.''' + """Extract text from a MOBCAL mobility calculation output file.""" def __init__(self): self.contents = None self.result = {} def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() return self.contents def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" done = False for line in self.contents: # if "average (second order) TM mobility" in line: @@ -506,23 +507,23 @@ def parse(self): return self.result def save(self, path: str, sep='\t'): - '''Write parsed object to file''' + """Write parsed object to file""" pd.DataFrame(self.result).to_csv(path, sep=sep, index=False) return class SanderParser(FileParserInterface): - '''Extract text from an Sander simulated annealing simulation output file.''' + """Extract text from an Sander simulated annealing simulation output file.""" def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" raise NotImplementedError def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" raise NotImplementedError def save(self, path: str): - '''Write parsed object to file''' + """Write parsed object to file""" raise NotImplementedError class XTBParser(FileParserInterface): @@ -532,7 +533,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path @@ -711,9 +712,9 @@ def _parse_protocol(self): return protocol def _parse_xyz(self): - ''' + """ Split .xyz into separate XYZGeometry instances - ''' + """ FILE = self.xyz_path if len(list(pybel.readfile('xyz', FILE))) > 1: @@ -733,7 +734,7 @@ def _parse_xyz(self): return isicle.conformers.ConformationalEnsemble(x) def parse(self): - '''Extract relevant information from data''' + """Extract relevant information from data""" # Check that the file is valid first if len(self.contents) == 0: @@ -827,7 +828,7 @@ def __init__(self): self.path = None def load(self, path: str): - '''Load in the data file''' + """Load in the data file""" with open(path, 'r') as f: self.contents = f.readlines() self.path = path diff --git a/isicle/qm.py b/isicle/qm.py index 71059bd..0c6febe 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -11,7 +11,7 @@ def _program_selector(program): - ''' + """ Selects a supported quantum mechanical program for associated simulation. Currently only NWChem has been implemented. @@ -26,7 +26,7 @@ def _program_selector(program): Wrapped functionality of the selected program. Must implement :class:`~isicle.interfaces.WrapperInterface`. - ''' + """ program_map = {'nwchem': NWChemWrapper} @@ -37,10 +37,14 @@ def _program_selector(program): .format(program)) -def dft(geom, program='NWChem', template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. +def dft(geom, program='NWChem', template=None, tasks='energy', functional='b3lyp', + basis_set='6-31g*', ao_basis='cartesian', charge=0, + atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', + gas=False, max_iter=150, mem_global=1600, mem_heap=100, + mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- @@ -50,35 +54,75 @@ def dft(geom, program='NWChem', template=None, **kwargs): Alias for program selection (NWChem). template : str Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. - + tasks : str or list of str + List of calculations to perform. One or more of "optimize", "energy", + "frequency", "shielding", "spin". + functional : str or list of str + Functional selection. Supply globally or per task. + basis_set : str or list of str + Basis set selection. Supply globally or per task. + ao_basis : str or list of str + Angular function selection ("spherical", "cartesian"). Supply + globally or per task. + charge : int + Nominal charge of the molecule to be optimized. + atoms : list of str + Atom types of interest. Only used for `spin` and `shielding` tasks. + temp : float + Temperature for frequency calculation. Only used if `frequency` is + a selected task. + cosmo : bool + Indicate whether to include COSMO block. Supply globally or per + task. + solvent : str + Solvent selection. Only used if `cosmo` is True. Supply globally or + per task. + gas : bool + Indicate whether to use gas phase calculations. Only used if + `cosmo` is True. Supply globally or per task. + max_iter : int + Maximum number of optimization iterations. + scratch_dir : str + Path to simulation scratch directory. + mem_global : int + Global memory allocation in MB. + mem_heap : int + Heap memory allocation in MB. + mem_stack : int + Stack memory allocation in MB. + Returns ------- :obj:`~isicle.qm.QMWrapper` - Instance of selected QMWrapper. + Wrapper object containing relevant outputs from the simulation. - ''' + """ # Select program - return _program_selector(program).run(geom, template=template, **kwargs) + return _program_selector(program).run(geom, template=template, tasks=tasks, + functional=functional, basis_set=basis_set, + ao_basis=ao_basis, charge=charge, + atoms=atoms, bonds=bonds, temp=temp, + cosmo=cosmo, solvent=solvent, gas=gas, + max_iter=max_iter, mem_global=mem_global, + mem_heap=mem_heap, mem_stack=mem_stack, + scratch_dir=scratch_dir, processes=processes, + command=command) class NWChemWrapper(XYZGeometry, WrapperInterface): - ''' + """ Wrapper for NWChem functionality. - Implements :class:`~isicle.interfaces.WrapperInterface` to ensure - required methods are exposed. - Attributes ---------- temp_dir : str Path to temporary directory used for simulation. task_map : dict Alias mapper for supported quantum mechanical presets. Thses include - "optimze", "shielding", and "spin". + "optimze", "energy", "frequency", "shielding", and "spin". + task_order : dict + Indicates order of tasks in `task_map`. geom : :obj:`~isicle.geometry.Geometry` Internal molecule representation. fmt : str @@ -86,17 +130,22 @@ class NWChemWrapper(XYZGeometry, WrapperInterface): config : str Configuration information for simulation. - ''' + """ + _defaults = ['geom'] _default_value = None def __init__(self, **kwargs): - ''' + """ Initialize :obj:`~isicle.qm.NWChemWrapper` instance. - Establishes aliases for preconfigured tasks. + Parameters + ---------- + kwargs + Keyword arguments to initialize instance attributes. + + """ - ''' self.__dict__.update(dict.fromkeys( self._defaults, self._default_value)) self.__dict__.update(**kwargs) @@ -117,7 +166,7 @@ def __init__(self, **kwargs): self.temp_dir = isicle.utils.mkdtemp() def set_geometry(self, geom): - ''' + """ Set :obj:`~isicle.geometry.Geometry` instance for simulation. Parameters @@ -125,39 +174,39 @@ def set_geometry(self, geom): geom : :obj:`~isicle.geometry.Geometry` Molecule representation. - ''' + """ # Assign geometry - self.geom = geom + self.geom = geom.__copy__() self.basename = self.geom.basename # Save self.save_geometry() def save_geometry(self, fmt='xyz'): - ''' + """ Save internal :obj:`~isicle.geometry.Geometry` representation to file. Parameters ---------- fmt : str - Filetype used by xtb. Must be "xyz", "smi", ".inchi", ".mol", ".xyz", - ".pdb", ".pkl". + File format. + + """ - ''' # Path operations self.fmt = fmt.lower() geomfile = os.path.join(self.temp_dir, '{}.{}'.format(self.geom.basename, self.fmt.lower())) - # All other formats + # Save isicle.io.save(geomfile, self.geom) self.geom.path = geomfile def _configure_header(self, scratch_dir=None, mem_global=1600, mem_heap=100, mem_stack=600): - ''' + """ Generate header block of NWChem configuration. Parameters @@ -176,7 +225,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, str Header block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -195,7 +244,7 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, 'print low\n').format(**d) def _configure_load(self, charge=0): - ''' + """ Generate geometry load block of NWChem configuration. Parameters @@ -206,9 +255,9 @@ def _configure_load(self, charge=0): Returns ------- str - Geometry load block of NWChem configuration. + Load block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'dirname': self.temp_dir, @@ -220,7 +269,7 @@ def _configure_load(self, charge=0): 'end\n').format(**d) def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): - ''' + """ Generate basis set block of NWChem configuration. Parameters @@ -235,7 +284,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): str Basis set block of NWChem configuration. - ''' + """ d = {'ao_basis': ao_basis, 'basis_set': basis_set} @@ -245,7 +294,7 @@ def _configure_basis(self, basis_set='6-31G*', ao_basis='cartesian'): 'end\n').format(**d) def _configure_dft(self, functional='b3lyp', odft=False): - ''' + """ Generate DFT block of NWChem configuration. Parameters @@ -261,7 +310,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): str DFT block of NWChem configuration. - ''' + """ d = {'functional': functional, 'dft': 'odft' if odft is True else 'dft'} @@ -279,7 +328,7 @@ def _configure_dft(self, functional='b3lyp', odft=False): return s def _configure_driver(self, max_iter=150): - ''' + """ Generate driver block of NWChem configuration. Parameters @@ -292,7 +341,7 @@ def _configure_driver(self, max_iter=150): str Driver block of NWChem configuration. - ''' + """ d = {'basename': self.geom.basename, 'max_iter': max_iter} @@ -303,7 +352,7 @@ def _configure_driver(self, max_iter=150): 'end\n').format(**d) def _configure_cosmo(self, solvent='H2O', gas=False): - ''' + """ Generate COSMO block of NWChem configuration. Parameters @@ -318,7 +367,7 @@ def _configure_cosmo(self, solvent='H2O', gas=False): str COSMO block of NWChem configuration. - ''' + """ d = {'solvent': solvent, 'gas': gas} @@ -331,11 +380,9 @@ def _configure_cosmo(self, solvent='H2O', gas=False): def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure frequency block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- temp : float @@ -353,15 +400,13 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Frequency block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -384,11 +429,9 @@ def _configure_frequency(self, temp=298.15, basis_set='6-31G*', ao_basis='cartes def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=False, solvent='H2O', gas=False, **kwargs): - ''' + """ Configure energy block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO block. - Parameters ---------- basis_set : str @@ -404,14 +447,13 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Energy block of NWChem configuration. - ''' + + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -429,9 +471,8 @@ def _configure_energy(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', max_iter=150, - cosmo=False, solvent='H2O', gas=False, - **kwargs): - ''' + cosmo=False, solvent='H2O', gas=False, **kwargs): + """ Generate meta optimization block of NWChem configuration. Includes basis, DFT, and driver blocks; can include COSMO block. @@ -453,15 +494,13 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to u se gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Optimization meta block of NWChem configuration. - ''' + """ # Add basis block s = self._configure_basis(basis_set=basis_set, ao_basis=ao_basis) @@ -484,12 +523,9 @@ def _configure_optimize(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta shielding block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- basis_set : str @@ -507,15 +543,14 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Shielding meta block of NWChem configuration. - ''' + """ + # Extract atom index information #idx = self.geom.mol.get_atom_indices(atoms=atoms) #new_idx = [] @@ -546,12 +581,9 @@ def _configure_shielding(self, basis_set='6-31G*', ao_basis='cartesian', def _configure_spin(self, bonds=1, basis_set='6-31G*', ao_basis='spherical', functional='b3lyp', cosmo=True, solvent='H2O', gas=False, **kwargs): - ''' + """ Generate meta spin-spin coupling block of NWChem configuration. - Includes basis and DFT blocks; can include COSMO and/or single-point - energy calculation blocks. - Parameters ---------- max_pairs : int @@ -570,15 +602,13 @@ def _configure_spin(self, bonds=1, basis_set='6-31G*', gas : bool Indicate whether to use gas phase calculations. Only used if `cosmo` is True. - **kwargs - Arbitrary additional arguments (unused). Returns ------- str Spin-spin coupling meta block of NWChem configuration. - ''' + """ def generate_pairs(mol, bonds=bonds): @@ -662,7 +692,7 @@ def configure(self, tasks='energy', functional='b3lyp', atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): - ''' + """ Configure NWChem simulation. Parameters @@ -709,7 +739,7 @@ def configure(self, tasks='energy', functional='b3lyp', str NWChem configuration. - ''' + """ # Cast to list safely tasks = safelist(tasks) @@ -789,7 +819,7 @@ def configure(self, tasks='energy', functional='b3lyp', def configure_from_template(self, path, basename_override=None, dirname_override=None, **kwargs): - ''' + """ Configure NWChem simulation from template file. Use for NWChem functionality not exposed by the wrapper. @@ -814,7 +844,7 @@ def configure_from_template(self, path, basename_override=None, str NWChem configuration. - ''' + """ # Add/override class-managed kwargs if basename_override is not None: @@ -840,10 +870,10 @@ def configure_from_template(self, path, basename_override=None, return self.config def save_config(self): - ''' + """ Write generated NWChem configuration to file. - ''' + """ # Write to file with open(os.path.join(self.temp_dir, @@ -851,10 +881,10 @@ def save_config(self): f.write(self.config) def submit(self): - ''' + """ Submit the NWChem simulation according to configured inputs. - ''' + """ infile = os.path.join(self.temp_dir, self.geom.basename + '.nw') outfile = os.path.join(self.temp_dir, self.geom.basename + '.out') @@ -873,7 +903,7 @@ def submit(self): subprocess.call(s, shell=True) def finish(self): - ''' + """ Parse NWChem simulation results. Returns @@ -881,24 +911,35 @@ def finish(self): :obj:`~isicle.parse.NWChemResult` Parsed result data. - ''' + """ + # Initialize parser parser = NWChemParser() - parser.load(os.path.join(self.temp_dir, + + # Open output file + self.output = parser.load(os.path.join(self.temp_dir, self.basename + '.out')) + + # Parse results result = parser.parse() + # Update this instance attributes self.__dict__.update(result) + + # Update geom attributes self.geom.add___dict__( {k: v for k, v in result.items() if k != 'geom'}) - self.output = parser.load(os.path.join(self.temp_dir, - self.basename + '.out')) + return self - def run(self, geom, template=None, **kwargs): - ''' - Optimize geometry via density functional theory using supplied functional - and basis set. + def run(self, geom, template=None, tasks='energy', functional='b3lyp', + basis_set='6-31g*', ao_basis='cartesian', charge=0, + atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', + gas=False, max_iter=150, mem_global=1600, mem_heap=100, + mem_stack=600, scratch_dir=None, processes=12, command='nwchem'): + """ + Perform density functional theory calculations according to supplied task list + and configuration parameters. Parameters ---------- @@ -906,16 +947,49 @@ def run(self, geom, template=None, **kwargs): Molecule representation. template : str Path to optional template to bypass default configuration process. - **kwargs - Keyword arguments to configure the simulation. - See :meth:`~isicle.qm.NWChemWrapper.configure`. + tasks : str or list of str + List of calculations to perform. One or more of "optimize", "energy", + "frequency", "shielding", "spin". + functional : str or list of str + Functional selection. Supply globally or per task. + basis_set : str or list of str + Basis set selection. Supply globally or per task. + ao_basis : str or list of str + Angular function selection ("spherical", "cartesian"). Supply + globally or per task. + charge : int + Nominal charge of the molecule to be optimized. + atoms : list of str + Atom types of interest. Only used for `spin` and `shielding` tasks. + temp : float + Temperature for frequency calculation. Only used if `frequency` is + a selected task. + cosmo : bool + Indicate whether to include COSMO block. Supply globally or per + task. + solvent : str + Solvent selection. Only used if `cosmo` is True. Supply globally or + per task. + gas : bool + Indicate whether to use gas phase calculations. Only used if + `cosmo` is True. Supply globally or per task. + max_iter : int + Maximum number of optimization iterations. + scratch_dir : str + Path to simulation scratch directory. + mem_global : int + Global memory allocation in MB. + mem_heap : int + Heap memory allocation in MB. + mem_stack : int + Stack memory allocation in MB. Returns ------- :obj:`~isicle.qm.NWChemWrapper` Wrapper object containing relevant outputs from the simulation. - ''' + """ # Set geometry self.set_geometry(geom) @@ -924,7 +998,15 @@ def run(self, geom, template=None, **kwargs): if template is not None: self.configure_from_template(template) else: - self.configure(**kwargs) + self.configure(tasks=tasks, + functional=functional, basis_set=basis_set, + ao_basis=ao_basis, charge=charge, + atoms=atoms, bonds=bonds, temp=temp, + cosmo=cosmo, solvent=solvent, gas=gas, + max_iter=max_iter, mem_global=mem_global, + mem_heap=mem_heap, mem_stack=mem_stack, + scratch_dir=scratch_dir, processes=processes, + command=command) # Run QM simulation self.submit() @@ -935,14 +1017,14 @@ def run(self, geom, template=None, **kwargs): return self def get_structure(self): - ''' + """ Extract structure from containing object. Returns ------- :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - - ''' + Structure instance. + + """ return self.geom diff --git a/isicle/utils.py b/isicle/utils.py index 486b980..da92f7e 100644 --- a/isicle/utils.py +++ b/isicle/utils.py @@ -8,7 +8,7 @@ def safelist(x): - ''' + """ Ensures passed object is of correct format. Parameters @@ -20,7 +20,7 @@ def safelist(x): list, :obj:`~pd.core.series.Series`, or :obj:`~np.ndarray` Input safely cast to list-like. - ''' + """ if not isinstance(x, (list, pd.core.series.Series, np.ndarray)): return [x].copy() @@ -28,7 +28,7 @@ def safelist(x): class TypedList(collections.abc.MutableSequence): - ''' + """ Mutable sequence that requires all members be of select type(s). Attributes @@ -38,10 +38,10 @@ class TypedList(collections.abc.MutableSequence): list : list Internal list representation. - ''' + """ def __init__(self, oktypes, *args): - ''' + """ Initialize :obj:`~isicle.utils.TypedList` instance. Parameters @@ -51,7 +51,7 @@ def __init__(self, oktypes, *args): *args Objects to comprise the type-restricted list. - ''' + """ self.oktypes = oktypes self.list = list() @@ -61,7 +61,7 @@ def __init__(self, oktypes, *args): self.extend(list(args)) def check(self, v): - ''' + """ Check if supplied value is of allowed type(s). Raises @@ -69,7 +69,7 @@ def check(self, v): TypeError If value is not of allowed type(s). - ''' + """ if not isinstance(v, self.oktypes): raise TypeError(v) @@ -104,7 +104,7 @@ def atomic_masses(): def gettempdir(): - ''' + """ Return the name of the directory used for temporary files. Returns @@ -112,7 +112,7 @@ def gettempdir(): str Path to temporary directory. - ''' + """ root = os.path.join(tempfile.gettempdir(), 'isicle') @@ -123,7 +123,7 @@ def gettempdir(): def mkdtemp(prefix=None, suffix=None): - ''' + """ An ISiCLE-specific wrapper of :func:`~tempfile.mkdtemp` to create a temporary directory for temporary ISiCLE files. The temporary directory is not automatically removed. @@ -139,17 +139,16 @@ def mkdtemp(prefix=None, suffix=None): ------- str Path to temporary directory. - - ''' - + """ + return tempfile.mkdtemp(dir=gettempdir(), prefix=prefix, suffix=suffix) def rmdtemp(): - ''' + """ Removes all temporary directories and files created by ISiCLE. - ''' + """ shutil.rmtree(gettempdir(), ignore_errors=True)