diff --git a/isicle/adducts.py b/isicle/adducts.py index 8e692bf..dcb23f0 100644 --- a/isicle/adducts.py +++ b/isicle/adducts.py @@ -296,13 +296,6 @@ def set_geometry(self, geom): else: raise ValueError('Could not find self.mol or self.geom') - def set_charge(self): - ''' - ''' - - if self.geom.__dict__.get('charge') is None: - self.geom.__dict__.update(charge=self.geom.get_charge()) - def _set_ions(self, ion_path=None, ion_list=None): ''' ''' @@ -468,6 +461,7 @@ def _forcefield_selector(self, forcefield, mw): raise ValueError( 'RDKit only supports UFF, MMFF, MMFF94, MMFF94s as forcefields.') + # TODO: update this def _update_geometry_charge(self, geom): ''' ''' @@ -873,9 +867,6 @@ def run(self, geom, ion_path=None, ion_list=None, **kwargs): # Sets geom to self.geom self.set_geometry(geom) - # Infers charge of geom.mol - self.set_charge() - # Load specified ions by type # Validity check if negative ionization can be done self.configure(ion_path=ion_path, ion_list=ion_list) @@ -975,18 +966,6 @@ def set_geometry(self, geom): else: raise ValueError('Could not find self.xyz, self.mol, or self.geom') - def set_charge(self): - ''' - ''' - - if self.geom.__dict__.get('charge') is None: - if self.geom.load.get('filetype') == '.xyz': - # self.geom.__dict__.update(charge=charge) - raise ValueError( - 'Must first run geom.set_charge for an xyz structure') - else: - self.geom.__dict__.update(charge=self.geom.get_charge()) - def _set_ions(self, ion_path=None, ion_list=None): cations, anions, complex = _parse_ions( ion_path=ion_path, ion_list=ion_list) @@ -1032,11 +1011,11 @@ def _check_valid(self): self.adducts['complex'] = self._filter_supported_by_xtb( self.adducts['complex']) - if self.geom.load['filetype'] != '.xyz': - self.adducts['anions'] = _filter_by_substructure_match( - self.geom.mol, self.adducts['anions']) - self.adducts['complex'] = _filter_by_substructure_match( - self.geom.mol, self.adducts['complex']) + + self.adducts['anions'] = _filter_by_substructure_match( + self.geom.mol, self.adducts['anions']) + self.adducts['complex'] = _filter_by_substructure_match( + self.geom.mol, self.adducts['complex']) def configure(self, ion_path=None, ion_list=None): self._set_ions(ion_path=ion_path, ion_list=ion_list) @@ -1054,14 +1033,15 @@ def _parse_ion_charge(self, ion): charge = int(charge[0]) return charge + # TODO: update this def _update_geometry_charge(self, geom, adduct, ion_charge, mode): ''' ''' if mode == 'negative': - charge = geom.__dict__.get('charge') - ion_charge + charge = geom.get_charge() - ion_charge elif mode == 'positive': - charge = geom.__dict__.get('charge') + ion_charge + charge = geom.get_charge() + ion_charge adduct.__dict__.update(charge=charge) def _positive_mode(self, geom, forcefield, ewin, cation, optlevel, dryrun, processes, solvation, ignore_topology): @@ -1070,7 +1050,7 @@ def _positive_mode(self, geom, forcefield, ewin, cation, optlevel, dryrun, proce ''' output = md(geom, program='xtb', task='protonate', forcefield=forcefield, - ewin=ewin, ion=cation, optlevel=optlevel, dryrun=dryrun, charge=geom.charge, processes=processes, solvation=solvation, ignore_topology=ignore_topology) + ewin=ewin, ion=cation, optlevel=optlevel, dryrun=dryrun, processes=processes, solvation=solvation, ignore_topology=ignore_topology) ion_charge = self._parse_ion_charge(cation) for adduct in output.geom: self._update_geometry_charge(geom, adduct, ion_charge, 'positive') @@ -1082,7 +1062,7 @@ def _negative_mode(self, geom, forcefield, ewin, anion, optlevel, dryrun, proces ''' output = md(geom, program='xtb', task='deprotonate', forcefield=forcefield, - ewin=ewin, ion=anion, optlevel=optlevel, dryrun=dryrun, charge=geom.charge, processes=processes, solvation=solvation, ignore_topology=ignore_topology) + ewin=ewin, ion=anion, optlevel=optlevel, dryrun=dryrun, processes=processes, solvation=solvation, ignore_topology=ignore_topology) ion_charge = self._parse_ion_charge(anion) for adduct in output.geom: self._update_geometry_charge(geom, adduct, ion_charge, 'negative') @@ -1181,9 +1161,6 @@ def run(self, geom, ion_path=None, ion_list=None, **kwargs): self.set_geometry(geom) - # Infers charge for non xyz files, infers default neutral for xyz files - self.set_charge() - # Load specified ions by type # Validity check if negative ionization can be done # Validity check if CREST supports the ions specified diff --git a/isicle/conformers.py b/isicle/conformers.py index ff0b59b..236fc52 100644 --- a/isicle/conformers.py +++ b/isicle/conformers.py @@ -3,7 +3,7 @@ from statsmodels.stats.weightstats import DescrStatsW from isicle import io -from isicle.geometry import Geometry, XYZGeometry +from isicle.geometry import Geometry from isicle.utils import TypedList, safelist @@ -382,7 +382,7 @@ def __init__(self, *args): ''' - super().__init__((Geometry, XYZGeometry), *args) + super().__init__(Geometry, *args) def _check_attributes(self, attr): ''' diff --git a/isicle/geometry.py b/isicle/geometry.py index a6072f1..69a4eda 100644 --- a/isicle/geometry.py +++ b/isicle/geometry.py @@ -7,33 +7,29 @@ from rdkit.Chem.SaltRemover import SaltRemover import isicle -from isicle.interfaces import GeometryInterface, XYZGeometryInterface +from isicle.interfaces import GeometryInterface -class XYZGeometry(XYZGeometryInterface): +class Geometry(GeometryInterface): """ - Molecule information, specialized for XYZ files (bonding info not provided). - It is not recommended to manipulate or retrieve attributes of this class - without using class functions. + Molecule information, specialized for 3D representations. It is not + recommended to manipulate or retrieve attributes of this class without + using class functions. """ - _defaults = ["xyz"] + _defaults = ["mol", "charge", "basename"] _default_value = None def __init__(self, **kwargs): - """ - Initialize :obj:`~isicle.geometry.XYZGeometry` instance. - - Parameters - ---------- - kwargs - Keyword arguments to initialize instance attributes. - - """ - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(kwargs) + # if self.charge is None: + # self.charge = self.get_charge() + + def view(self): + return self.to_mol() + def get_basename(self): """ Returns a copy of this object's basename (original filename). @@ -68,146 +64,6 @@ def add___dict__(self, d, override=False): # Add to attributes self.__dict__.update(d) - def dft(self, backend="NWChem", **kwargs): - """ - Perform density functional theory calculations according to supplied task list - and configuration parameters for specified backend. - - Parameters - ---------- - backend : str - Alias for backend selection (NWChem, ORCA). - kwargs - Keyword arguments supplied to selected program. See `run` method of relevant - wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. - - Returns - ------- - :obj:`~isicle.qm.{program}Wrapper` - Wrapper object containing relevant outputs from the simulation. - - """ - - return isicle.qm.dft(self, backend=backend, **kwargs) - - def md(self, program="xtb", **kwargs): - """ - Optimize geometry or generate conformers or adducts using stated forcefield. - - Parameters - ---------- - kwargs - Keyword arguments supplied to selected program. See `run` method of relevant - wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`. - - Returns - ------- - :obj:`~isicle.md.{program}Wrapper` - Wrapper object containing relevant outputs from the simulation. - - """ - - return isicle.md.md(self, program=program, **kwargs) - - def ionize(self, ion_path=None, ion_list=None, **kwargs): - """ - Calls xtb CREST to ionize XYZ geometry, using specified list of ions and method of ionization. - WARNING: must run isicle.geometry.XYZGeometry.set_charge prior to this. - - Parameters - ---------- - ion_path : str - Filepath to text file containing ions with charge (eg. `H+`) to be considered - Either ion_path or ion_list must be specified - ion_list : list - List of strings of adducts to be considered. - Must be specifed in syntax `Atom+` or `Atom-`, eg. `H+`, `Na+`, `H-Na+` - Either ion_path or ion_list must be specified - kwargs - See :meth: `~isicle.adducts.CRESTIonizationWrapper.submit` - for more options - - Returns - ------- - Dictionary of adducts, `{:[]}` - - """ - - if self.__dict__.get("charge") is None: - raise ValueError( - "Must first run isicle.geometry.XYZGeometry.set_charge for an XYZ structure" - ) - iw = isicle.adducts.ionize("crest").run( - self.__copy__(), ion_path=ion_path, ion_list=ion_list, **kwargs - ) - - return iw - - def set_charge(self, charge): - """ - Specify and set the known formal charge of the XYZ molecule. - - Parameters - ---------- - charge : int - Nominal charge of the molecule. - - """ - - # Attempt cast to int - try: - charge = int(charge) - except: - raise TypeError("Charge specifed must be str or int.") - - # Update attribute - self.__dict__.update(charge=charge) - - def get_natoms(self): - """ - Calculate total number of atoms. - - Returns - ------- - int - Number of atoms. - - """ - - self.__dict__["natoms"] = int(self.xyz[0].strip()) - return self.__dict__["natoms"] - - def get_atom_indices(self, atoms=["C", "H"]): - """ - Extract indices of each atom from the internal geometry. - - Parameters - ---------- - atoms : list of str - Atom types of interest. - - Returns - ------- - list of int - Atom indices. - - """ - - # Index container - idx = [] - - # Enumerate rows - for i in range(2, len(self.xyz)): - # Extract atom - atom = self.xyz[i].split(" ")[0] - - # Check if of interest - if atom in atoms: - # Append index - idx.append(i - 2) - - return idx - def get___dict__(self): """ Return a copy of this object's attributes as a dictionary. @@ -223,7 +79,7 @@ def get___dict__(self): def __copy__(self, **kwargs): """ - Copy this class instance. + Return hard copy of this class instance. Parameters ---------- @@ -232,7 +88,7 @@ def __copy__(self, **kwargs): Returns ------- - :obj:`~isicle.geometry.XYZGeometry` + :obj:`~isicle.geometry.Geometry` Molecule representation. """ @@ -252,53 +108,6 @@ def __copy__(self, **kwargs): return instance - def to_xyzblock(self): - """ - Get XYZ text for this structure. - - Returns - ------- - str - XYZ text. - - """ - - return "\n".join(self.xyz) - - def to_mol(self): - """ - Get structure representation in :obj:`~rdkit.Chem.rdchem.Mol` format. - - :obj:`~rdkit.Chem.rdchem.Mol` - RDKit Mol instance. - - """ - - self.temp_dir = isicle.utils.mkdtemp() - - geomfile = os.path.join(self.temp_dir, "{}.{}".format(self.basename, "xyz")) - isicle.io.save(geomfile, self) - - mols = list(pybel.readfile("xyz", geomfile)) - mo = mols[0] - mo_block = mo.write("mol") - return Chem.MolFromMolBlock(mo_block, removeHs=False) - - -class Geometry(XYZGeometry, GeometryInterface): - """ - Molecule information, specialized for 3D representations. It is not - recommended to manipulate or retrieve attributes of this class without - using class functions. - """ - - _defaults = ["mol"] - _default_value = None - - def __init__(self, **kwargs): - self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) - self.__dict__.update(kwargs) - def _is_embedded(self, mol): """ Check if molecule has embedded 3D coordinates. @@ -589,9 +398,7 @@ def get_natoms(self): """ - natoms = Chem.Mol.GetNumAtoms(self.to_mol()) - self.__dict__["natoms"] = natoms - return self.__dict__["natoms"] + return Chem.Mol.GetNumAtoms(self.to_mol()) def get_atom_indices( self, atoms=["C", "H"], lookup={"C": 6, "H": 1, "N": 7, "O": 8, "F": 9, "P": 15} @@ -653,52 +460,46 @@ def get_charge(self): return Chem.rdmolops.GetFormalCharge(self.to_mol()) - def set_charge(self, charge=None): + def dft(self, backend="NWChem", **kwargs): """ - Set formal charge of the molecule. + Perform density functional theory calculations according to supplied task list + and configuration parameters for specified backend. Parameters ---------- - charge : int - Formal charge of molecule. + backend : str + Alias for backend selection (NWChem, ORCA). + kwargs + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.qm.NWChemWrapper.run`. - """ + Returns + ------- + :obj:`~isicle.qm.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. - if charge is None: - charge = self.get_charge() + """ - self.__dict__.update(charge=int(charge)) + return isicle.qm.dft(self, backend=backend, **kwargs) - def __copy__(self, **kwargs): + def md(self, program="xtb", **kwargs): """ - Return hard copy of this class instance. + Optimize geometry or generate conformers or adducts using stated forcefield. Parameters ---------- kwargs - Keyword arguments to update copy. + Keyword arguments supplied to selected program. See `run` method of relevant + wrapper for configuration parameters, e.g. :meth:`~isicle.md.XTBWrapper.run`. Returns ------- - :obj:`~isicle.geometry.Geometry` - Molecule representation. + :obj:`~isicle.md.{program}Wrapper` + Wrapper object containing relevant outputs from the simulation. """ - # Copy attributes - d = self.__dict__.copy() - - # Safely copy mol if present - if "mol" in d: - d["mol"] = self.to_mol() - - # Build self instance from scratch - instance = type(self)(**d) - - # Update from kwargs - instance.__dict__.update(kwargs) - - return instance + return isicle.md.md(self, program=program, **kwargs) def to_mol(self): """ diff --git a/isicle/interfaces.py b/isicle/interfaces.py index 709420a..62f9196 100644 --- a/isicle/interfaces.py +++ b/isicle/interfaces.py @@ -93,16 +93,30 @@ class GeometryInterface(XYZGeometryInterface): @classmethod def __subclasshook__(cls, subclass): - return (hasattr(subclass, 'to_mol') - and callable(subclass.to_mol) + return (hasattr(subclass, 'get___dict__') + and callable(subclass.get___dict__) + and hasattr(subclass, '__copy__') + and callable(subclass.__copy__) and hasattr(subclass, 'get_total_partial_charge') and callable(subclass.get_total_partial_charge) + and hasattr(subclass, 'dft') + and callable(subclass.dft) + and hasattr(subclass, 'md') + and callable(subclass.md) + and hasattr(subclass, 'get_natoms') + and callable(subclass.get_natoms) + and hasattr(subclass, 'get_atom_indices') + and callable(subclass.get_atom_indices) + and hasattr(subclass, 'to_mol') + and callable(subclass.to_mol) and hasattr(subclass, 'to_smiles') and callable(subclass.to_smiles) and hasattr(subclass, 'to_inchi') and callable(subclass.to_inchi) and hasattr(subclass, 'to_smarts') and callable(subclass.to_smarts) + and hasattr(subclass, 'to_xyzblock') + and callable(subclass.to_xyzblock) or NotImplemented) @abc.abstractmethod diff --git a/isicle/io.py b/isicle/io.py index 1b6d19d..2e9f094 100644 --- a/isicle/io.py +++ b/isicle/io.py @@ -6,8 +6,7 @@ import isicle import pandas as pd from rdkit import Chem -from openbabel import pybel - +from rdkit.Chem import rdDetermineBonds def _load_text(path: str): """ @@ -33,7 +32,7 @@ def _load_text(path: str): return [x.strip() for x in contents] -def load_xyz(path, basename=None): +def load_xyz(path, charge=None): """ Load XYZ from file. @@ -41,36 +40,28 @@ def load_xyz(path, basename=None): ---------- path : str Path to XYZ file. - basename : str - Basename to save files as, if none default to inchikey Return ------- - :obj:`~isicle.geometry.XYZGeometry` + :obj:`~isicle.geometry.Geometry` Molecule representation. """ - - # Initialize XYZGeometry instance - geom = isicle.geometry.XYZGeometry() - - # Load xyz file contents - geom.xyz = _load_text(path) + + # Check for charge + if charge is None: + raise ValueError("Charge must be specified when loading XYZ files.") # Create mol object - mols = list(pybel.readfile("xyz", path)) - mo = mols[0] - mo_block = mo.write("mol") - geom.mol = Chem.MolFromMolBlock(mo_block, removeHs=False) + raw_mol = Chem.MolFromXYZFile(path) + mol = Chem.Mol(raw_mol) + rdDetermineBonds.DetermineBonds(mol, charge=charge) - # Populate basename - if basename is None: - try: - geom.basename = os.path.splitext(os.path.basename(path))[0] - except: - geom.basename = Chem.MolToInchiKey(mol) - else: - geom.basename = basename + # Basename + basename = os.path.splitext(os.path.basename(path))[0] + + # Initialize Geometry instance + geom = isicle.geometry.Geometry(mol=mol, basename=basename) return geom @@ -92,7 +83,7 @@ def _check_mol(mol, string_struct): raise ValueError("Could not convert structure to mol: {}".format(string_struct)) -def _load_mol_from_file(path, func=None, basename=None): +def _load_mol_from_file(path, func=None): """ Load RDKit mol representation from file (pdb, mol, mol2). @@ -110,9 +101,6 @@ def _load_mol_from_file(path, func=None, basename=None): """ - # Initialize geometry instance - geom = isicle.geometry.Geometry() - # Load mol representation if func == Chem.MolFromMolFile: mol = func(path, removeHs=False, strictParsing=False) @@ -122,59 +110,16 @@ def _load_mol_from_file(path, func=None, basename=None): # Check result _check_mol(mol, path) - # Assign to instance attribute - geom.mol = mol - # Populate basename - if basename is None: - try: - geom.basename = os.path.splitext(os.path.basename(path))[0] - except: - geom.basename = Chem.MolToInchiKey(mol) - else: - geom.basename = basename - - return geom - - -def load_nwchem(path, basename=None): - """ - Load NWChem output from file. - - Parameters - ---------- - path : str - Path to nwchem output file - - Returns - ------- - :obj:`~isicle.qm.NWChemWrapper` - DFT representation + basename = os.path.splitext(os.path.basename(path))[0] - """ - - dft = isicle.qm.NWChemWrapper() - - parser = isicle.parse.NWChemParser() - parser.load(path) - result = parser.parse() - - dft.__dict__.update(result) - dft.geom.add___dict__({k: v for k, v in result.items() if k != "geom"}) - dft.output = parser.load(path) - - if basename is None: - try: - basename = Chem.MolToInchiKey(dft.geom.mol) - except: - basename = os.path.splitext((os.path.basename(path)))[0] - - dft.basename = basename + # Initialize geometry instance + geom = isicle.geometry.Geometry(mol=mol, basename=basename) - return dft + return geom -def load_mol(path, basename=None): +def load_mol(path): """ Load mol from file. @@ -190,10 +135,10 @@ def load_mol(path, basename=None): """ - return _load_mol_from_file(path, func=Chem.MolFromMolFile, basename=basename) + return _load_mol_from_file(path, func=Chem.MolFromMolFile) -def load_mol2(path: str, basename=None): +def load_mol2(path: str): """ Load mol2 from file. @@ -209,7 +154,7 @@ def load_mol2(path: str, basename=None): """ - return _load_mol_from_file(path, func=Chem.MolFromMol2File, basename=basename) + return _load_mol_from_file(path, func=Chem.MolFromMol2File) def load_pdb(path): @@ -231,7 +176,7 @@ def load_pdb(path): return _load_mol_from_file(path, func=Chem.MolFromPDBFile) -def _load_line_notation(path, func=None, force=False, string=False, basename=None): +def _load_line_notation(path, func=None, force=False, string=False): """ Load line notation representation (InChI, SMILES) from file. @@ -249,16 +194,18 @@ def _load_line_notation(path, func=None, force=False, string=False, basename=Non """ - # Initialize geometry instance - geom = isicle.geometry.Geometry() - + basename = None if string: # Load text text = path + else: # Load text text = _load_text(path)[0].strip() + # Extract basename + basename = os.path.splitext(os.path.basename(path))[0] + # Load without sanitization, downstream checks if force is True: mol = func(text, sanitize=False) @@ -274,19 +221,16 @@ def _load_line_notation(path, func=None, force=False, string=False, basename=Non mol = Chem.AddHs(mol) _check_mol(mol, text) - # Populate rdkit mol instance attribute - geom.mol = mol - - # Populate basename if basename is None: - geom.basename = Chem.MolToInchiKey(mol) - else: - geom.basename = basename + basename = Chem.MolToInchiKey(mol) + + # Initialize geometry instance + geom = isicle.geometry.Geometry(mol=mol, basename=basename) return geom -def load_smiles(path, force=False, basename=None): +def load_smiles(path, force=False): """ Load SMILES from file. @@ -307,15 +251,15 @@ def load_smiles(path, force=False, basename=None): extension = os.path.splitext(path)[-1].lower() if "smi" in extension: return _load_line_notation( - path, func=Chem.MolFromSmiles, force=force, basename=basename + path, func=Chem.MolFromSmiles, force=force ) else: return _load_line_notation( - path, func=Chem.MolFromSmiles, force=force, string=True, basename=basename + path, func=Chem.MolFromSmiles, force=force, string=True ) -def load_inchi(path, force=False, basename=None): +def load_inchi(path, force=False): """ Load InChI from file. @@ -335,11 +279,11 @@ def load_inchi(path, force=False, basename=None): if "inchi=" in path.lower(): return _load_line_notation( - path, func=Chem.MolFromInchi, force=force, string=True, basename=basename + path, func=Chem.MolFromInchi, force=force, string=True ) else: return _load_line_notation( - path, func=Chem.MolFromInchi, force=force, basename=basename + path, func=Chem.MolFromInchi, force=force ) @@ -394,7 +338,7 @@ def _check_mol_obj(mol_obj): raise IOError("Not a valid RDKit Mol object passed.") -def load_mol_obj(mol_obj, basename=None): +def load_mol_obj(mol_obj): """ Load RDKit mol object into geometry instance @@ -410,25 +354,17 @@ def load_mol_obj(mol_obj, basename=None): """ - # Initialize geometry instance - geom = isicle.geometry.Geometry() - # Validate mol object _check_mol_obj(mol_obj) - # Populate rdkit mol instance attribute - geom.mol = mol_obj - - # Populate basename - if basename is None: - geom.basename = Chem.MolToInchiKey(mol) - else: - geom.basename = basename + # Initialize geometry instance + geom = isicle.geometry.Geometry(mol=mol_obj, + basename=Chem.MolToInchiKey(mol_obj)) return geom -def load(path, force=False, basename=None): +def load(path, **kwargs): """ Reads in molecule information of the following supported file types: .smi, .inchi, .xyz, .mol, .mol2, .pkl, .pdb. Direct loaders can also @@ -438,8 +374,8 @@ def load(path, force=False, basename=None): ---------- path : str Path to file with molecule information. - force : bool - Indicate whether to force load input, ignoring errors. + kwargs + Keyword arguments passed to format-specific loaders. Returns ------- @@ -448,7 +384,7 @@ def load(path, force=False, basename=None): """ - if (type(path)) == str: + if isinstance(path, str): path = path.strip() extension = os.path.splitext(path)[-1].lower() @@ -456,31 +392,31 @@ def load(path, force=False, basename=None): return load_pickle(path) if "mol2" in extension: - return load_mol2(path, basename=basename) + return load_mol2(path) if "mol" in extension: - return load_mol(path, basename=basename) + return load_mol(path) if extension == ".joblib": return load_joblib(path) if extension == ".xyz": - return load_xyz(path, basename=basename) + return load_xyz(path, **kwargs) if extension == ".pdb": - return load_pdb(path, basename=basename) + return load_pdb(path) if extension == ".inchi" or "inchi=" in path.lower(): - return load_inchi(path, force=force, basename=basename) + return load_inchi(path, **kwargs) try: - return load_smiles(path, force=force, basename=basename) + return load_smiles(path, **kwargs) except: raise IOError("Extension {} not recognized.".format(extension)) else: try: - return load_mol_obj(path, basename=basename) + return load_mol_obj(path) except: raise IOError("Not a valid RDKit mol object passed.") @@ -500,10 +436,9 @@ def save_xyz(path, geom): """ # Check instance type - if not isinstance(geom, (isicle.geometry.Geometry, isicle.geometry.XYZGeometry)): + if not isinstance(geom, isicle.geometry.Geometry): raise TypeError( - "Must be `isicle.geometry.Geometry` or \ - `isicle.geometry.XYZGeometry` to save in XYZ format." + "Must be `isicle.geometry.Geometry` to save in XYZ format." ) # Write to file @@ -560,10 +495,9 @@ def save_mfj(path, geom): """ # Check instance type - if not isinstance(geom, (isicle.geometry.Geometry, isicle.geometry.XYZGeometry)): + if not isinstance(geom, isicle.geometry.Geometry): raise TypeError( - "Must be `isicle.geometry.Geometry` or \ - `isicle.geometry.XYZGeometry` to save in XYZ format." + "Must be `isicle.geometry.Geometry` to save in XYZ format." ) # Check for charges in global properties @@ -698,8 +632,7 @@ def save(path, data): Path to save file. Supported extensions include .pkl, .mfj, .xyz, .mol, .pdb, .inchi, .smi. data : obj - Object instance. Must be :obj:`~isicle.geometry.Geometry` or - :obj:`~isicle.geometry.XYZGeometry` for .xyz and .mfj. + Object instance. Must be :obj:`~isicle.geometry.Geometry` for .xyz and .mfj. """ diff --git a/isicle/md.py b/isicle/md.py index 67bfe49..69080cb 100644 --- a/isicle/md.py +++ b/isicle/md.py @@ -1,15 +1,15 @@ +from collections import defaultdict +import glob import os import subprocess + from rdkit import Chem -from rdkit.Chem import AllChem -from rdkit.Chem import rdForceFieldHelpers -from rdkit.Chem import ChemicalForceFields -from rdkit.Chem import rdDistGeom +from rdkit.Chem import AllChem, rdDetermineBonds, rdDistGeom import isicle -from isicle.geometry import Geometry, XYZGeometry +from isicle.geometry import Geometry from isicle.interfaces import WrapperInterface -from isicle.parse import XTBParser, TINKERParser +from isicle.parse import TINKERParser, XTBParser """ Files resulting from an xtb job always run in the same directory that the command is @@ -66,7 +66,7 @@ def md(geom, program="xtb", **kwargs): return _program_selector(program).run(geom, **kwargs) -class XTBWrapper(XYZGeometry, WrapperInterface): +class XTBWrapper(WrapperInterface): """ Wrapper for xtb functionality. @@ -92,6 +92,7 @@ class XTBWrapper(XYZGeometry, WrapperInterface): _default_value = None def __init__(self, **kwargs): + self.temp_dir = isicle.utils.mkdtemp() self.__dict__.update(dict.fromkeys(self._defaults, self._default_value)) self.__dict__.update(**kwargs) @@ -108,7 +109,6 @@ def set_geometry(self, geom): # Assign geometry self.geom = geom - self.basename = self.geom.basename # Save geometry self.save_geometry() @@ -124,20 +124,17 @@ def save_geometry(self, fmt="xyz"): ".pdb", ".pkl". """ - # Path operationspyth - self.temp_dir = isicle.utils.mkdtemp() + # Path operations self.fmt = fmt.lower() geomfile = os.path.join( - self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower()) + self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower()) ) # All other formats isicle.io.save(geomfile, self.geom) self.geom.path = geomfile - def _configure_xtb( - self, forcefield="gfn2", optlevel="normal", charge=None, solvation=None - ): + def _configure_xtb(self, forcefield="gfn2", optlevel="normal", solvation=None): """ Set command line for xtb simulations. @@ -151,9 +148,6 @@ def _configure_xtb( Optimization convergence level Default : normal Supported : crude, sloppy, loose, lax, normal, tight, vtight extreme - charge : int - Charge of molecular system. - Default : 0 (Neutral charge) """ @@ -161,7 +155,7 @@ def _configure_xtb( s = "xtb " # Add geometry - s += "{}.{}".format(self.basename, self.fmt.lower()) + s += "{}.{}".format(self.geom.basename, self.fmt.lower()) # Add optimize tag s += " --opt " + optlevel + " " @@ -169,9 +163,8 @@ def _configure_xtb( # Add forcefield s += "--" + forcefield + " " - # Add optional charge - if charge is not None: - s += "--chrg " + charge + " " + # Add charge + s += "--chrg " + str(self.geom.get_charge()) + " " # Add optional implicit solvation if solvation is not None: @@ -180,7 +173,7 @@ def _configure_xtb( # Add output s += "&>" + " " - s += "{}.{}".format(self.basename, "out") + s += "{}.{}".format(self.geom.basename, "out") return s def _configure_crest( @@ -192,7 +185,6 @@ def _configure_crest( deprotonate=False, tautomerize=False, ion=None, - charge=None, dryrun=False, processes=1, solvation=None, @@ -226,9 +218,7 @@ def _configure_crest( ion : str Keyword to couple with protonate to ionize molecule with an ion other than a proton. See :obj:`~isicle.adduct.parse_ion` for list of ion options. - charge : int - Charge of molecular system. - Default : 0 (Neutral charge) + """ # Start base command @@ -236,7 +226,9 @@ def _configure_crest( # Add geometry s += str( - os.path.join(self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower())) + os.path.join( + self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower()) + ) ) s += " " @@ -251,8 +243,8 @@ def _configure_crest( if ion is not None: s += "-swel " + ion + " " - if charge is not None: - s += "-chrg " + str(charge) + " " + # Add charge + s += "-chrg " + str(self.geom.get_charge()) + " " # Add dryrun option if dryrun: @@ -280,7 +272,7 @@ def _configure_crest( # Add output s += "&>" + " " - s += os.path.join(self.temp_dir, "{}.{}".format(self.basename, "out")) + s += os.path.join(self.temp_dir, "{}.{}".format(self.geom.basename, "out")) return s @@ -288,7 +280,6 @@ def configure( self, task="optimize", forcefield="gfn2", - charge=None, ewin=6, ion=None, optlevel="Normal", @@ -319,16 +310,14 @@ def configure( ion : str Keyword to couple with protonate to ionize molecule with an ion other than a proton. See :obj:`~isicle.adduct.parse_ion` for list of ion options. - charge : int - Charge of molecular system. - Default : 0 (Neutral charge) + """ - if type(task) == list: + if isinstance(task, list): raise TypeError("Initiate one xtb or crest job at a time.") - if type(forcefield) == list: + if isinstance(forcefield, list): raise TypeError("Initiate one forcefield at a time.") - if type(optlevel) == list: + if isinstance(optlevel, list): raise TypeError("Initiate one opt level at a time.") if task == "optimize": @@ -356,14 +345,13 @@ def configure( deprotonate=d, tautomerize=t, ion=i, - charge=charge, dryrun=dryrun, processes=processes, solvation=solvation, ignore_topology=ignore_topology, ) else: - raise Error( + raise ValueError( "Task not assigned properly, please choose optimize, conformer, protonate, deprotonate, or tautomerize" ) @@ -375,38 +363,129 @@ def submit(self): """ Run xtb or crest simulation according to configured inputs. """ - owd = os.getcwd() + cwd = os.getcwd() os.chdir(self.temp_dir) - job = self.config - subprocess.call(job, shell=True) - os.chdir(owd) + subprocess.call(self.config, shell=True) + os.chdir(cwd) def finish(self): """ Parse results, save xtb output files, and clean temporary directory """ - parser = XTBParser() + # Get list of outputs + outfiles = glob.glob(os.path.join(self.temp_dir, "*")) + + # Result container + result = {} + + # Split out geometry files + geomfiles = sorted([x for x in outfiles if x.endswith(".xyz")]) + outfiles = sorted([x for x in outfiles if not x.endswith(".xyz")]) + + # # Atom count + # n_atoms = xtbw.geom.get_natoms() + + # Charge lookup + charge_lookup = defaultdict(lambda: 0, {"protonated": 1, "deprotonated": -1}) + + # Enumerate geometry files + for geomfile in geomfiles: + # Extract unique basename + # Replace ancillary "crest_" + basename = os.path.splitext( + os.path.basename(geomfile).replace("crest_", "") + )[0] + + # Only process of-interest structures + if basename in [ + "struc", + "best", + "xtbopt", + "protonated", + "deprotonated", + "tautomers", + "conformers", + "rotamers" + ]: + # Open file + with open(geomfile, "r") as f: + contents = f.readlines() + + # Get file-specific atom count + n_atoms = int(contents[0].strip()) + + # Split into individual xyz + contents = [ + "".join(contents[i : i + n_atoms + 2]) + for i in range(0, len(contents), n_atoms + 2) + ] + + # Iterate XYZ content + geoms = [] + for xyzblock in contents: + # Construct mol from XYZ + raw_mol = Chem.MolFromXYZBlock(xyzblock) + mol = Chem.Mol(raw_mol) + rdDetermineBonds.DetermineBonds( + mol, charge=self.geom.get_charge() + charge_lookup[basename] + ) + + # Initialize Geometry instance + geom = isicle.geometry.Geometry( + mol=mol, basename=self.geom.basename + ) + + # Append to list of geometries + geoms.append(geom) + + # Add to result container + if len(geoms) > 1: + result[basename] = geoms + else: + result[basename] = geoms[0] + + # Enumerate output files + for outfile in outfiles: + # Split name and extension + if "." in outfile: + basename, ext = os.path.basename(outfile).rsplit(".", 1) + + # No extension + else: + basename = os.path.basename(outfile) + ext = None - parser.load(os.path.join(self.temp_dir, self.basename + ".out")) - self.output = parser.load(os.path.join(self.temp_dir, self.basename + ".out")) + # Key management + if ext in [None, "tmp", "0"]: + key = basename + else: + key = ext + + # Read output content + with open(outfile, "rb") as f: + contents = f.read() + + # Attempt utf-8 decode + try: + result[key] = contents.decode("utf-8") + except UnicodeDecodeError: + result[key] = contents + + # Renaming key + rename = { + "struc": "input", + "xtbopt": "final", + "original": "coord_original", + "protonated": "cations", + "deprotonated": "anions", + } - result = parser.parse() + # Rename matching keys + for key in result.keys() & rename.keys(): + result[rename[key]] = result.pop(key) - self.__dict__.update(result) - - for i in self.geom: - i.add___dict__({k: v for k, v in result.items() if k != "geom"}) - i.__dict__.update(basename=self.basename) - - if self.task != "optimize": - conformerID = 1 - for i in self.geom: - i.__dict__.update(conformerID=conformerID) - conformerID += 1 - return self - else: - self.geom = self.geom[0] + return result def run(self, geom, **kwargs): """ @@ -441,47 +520,12 @@ def run(self, geom, **kwargs): self.submit() # Finish/clean up - self.finish() - - return self - - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) + result = self.finish() - return self.geom + return result class RDKitWrapper(Geometry, WrapperInterface): - """ Wrapper for RDKit functionality. @@ -517,7 +561,6 @@ def set_geometry(self, geom): # Assign geometry self.geom = geom - self.basename = self.geom.basename def configure(self, method: str = "etkdgv3", numConfs: int = 10, **kwargs): """ @@ -641,15 +684,15 @@ def finish(self): """ Parse RDKit conformers generated. """ - confCount = self.geom.mol.GetNumConformers() + conf_count = self.geom.mol.GetNumConformers() conformers = [ - isicle.load(Chem.Mol(self.geom.mol, confId=i), basename=self.basename) - for i in range(confCount) + isicle.load(Chem.Mol(self.geom.mol, confId=i)) for i in range(conf_count) ] - self.geom = conformers - for conf, label in zip(self.geom, range(confCount)): - conf.__dict__.update(conformerID=label) + for conf, label in zip(conformers, range(conf_count)): + conf.__dict__.update(conformerID=label, basename=self.geom.basename) + + return isicle.conformers.ConformationalEnsemble(conformers) def run(self, geom, **kwargs): """ @@ -683,47 +726,10 @@ def run(self, geom, **kwargs): self.submit() # Finish/clean up - self.finish() - - return self - - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) - - return self.geom + return self.finish() class TINKERWrapper(Geometry, WrapperInterface): - """ Wrapper for TINKER functionality. @@ -994,7 +1000,7 @@ def getMMFF_large_atom_type(mmff_props, atom, m): xyz = "" # Set header line with number of atoms and basename - xyz += "{:>6} {}\n".format(mol.GetNumAtoms(), self.basename) + xyz += "{:>6} {}\n".format(mol.GetNumAtoms(), self.geom.basename) for atom in mol.GetAtoms(): bond_list = [] @@ -1032,7 +1038,6 @@ def set_geometry(self, geom): # Assign geometry self.geom = geom - self.basename = self.geom.basename self.tinkerxyz = self._convert_to_tinkerxyz() @@ -1054,7 +1059,7 @@ def save_geometry(self, fmt="xyz"): self.temp_dir = isicle.utils.mkdtemp() self.fmt = fmt.lower() geomfile = os.path.join( - self.temp_dir, "{}.{}".format(self.basename, self.fmt.lower()) + self.temp_dir, "{}.{}".format(self.geom.basename, self.fmt.lower()) ) with open(geomfile, "w+") as f: @@ -1066,7 +1071,7 @@ def save_geometry(self, fmt="xyz"): def configure(self, task="scan", tinker_path="~/tinker"): config = tinker_path + "/bin/" + task + " " + self.geom.path + " " config += tinker_path + "/params/mmff.prm 0 10 20 0.00001 " - config += "| tee ./" + self.basename + ".tout" + config += "| tee ./" + self.geom.basename + ".tout" self.config = config @@ -1080,8 +1085,10 @@ def submit(self): def finish(self): parser = TINKERParser() - parser.load(os.path.join(self.temp_dir, self.basename + ".tout")) - self.output = parser.load(os.path.join(self.temp_dir, self.basename + ".tout")) + parser.load(os.path.join(self.temp_dir, self.geom.basename + ".tout")) + self.output = parser.load( + os.path.join(self.temp_dir, self.geom.basename + ".tout") + ) result = parser.parse() @@ -1090,7 +1097,7 @@ def finish(self): conformerID = 1 for i in self.geom: i.add___dict__({k: v for k, v in result.items() if k != "geom"}) - i.__dict__.update(basename=self.basename) + i.__dict__.update(basename=self.geom.basename) i.__dict__.update(conformerID=conformerID) conformerID += 1 return self @@ -1132,37 +1139,3 @@ def run(self, geom, **kwargs): self.finish() return self - - def get_structures(self): - """ - Extract all structures from containing object as a conformational ensemble. - - Returns - ------- - :obj:`~isicle.conformers.ConformationalEnsemble` - Conformational ensemble. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - return self.geom - - raise TypeError( - "Object does not contain multiple structures. Use `get_structure` instead." - ) - - def get_structure(self): - """ - Extract structure from containing object. - - Returns - ------- - :obj:`~isicle.geometry.XYZGeometry` - Structure instance. - - """ - if isinstance(self.geom, isicle.conformers.ConformationalEnsemble): - raise TypeError( - "Object contains multiple structures. Use `get_structures` instead." - ) - - return self.geom diff --git a/isicle/mobility.py b/isicle/mobility.py index 55a2e45..107f880 100644 --- a/isicle/mobility.py +++ b/isicle/mobility.py @@ -4,7 +4,6 @@ from importlib import resources import isicle -from isicle.geometry import XYZGeometry from isicle.interfaces import WrapperInterface @@ -21,7 +20,7 @@ def _mobcal_selector(): raise OSError('mobcal installation not found') -class MobcalWrapper(XYZGeometry, WrapperInterface): +class MobcalWrapper(WrapperInterface): def __init__(self): pass diff --git a/isicle/parse.py b/isicle/parse.py index 7a0be92..6ac1ee3 100644 --- a/isicle/parse.py +++ b/isicle/parse.py @@ -1,8 +1,5 @@ -import glob -import os import pickle import re -from os.path import splitext import numpy as np import pandas as pd @@ -40,7 +37,7 @@ def _find_output_by_header(self, header): def _parse_protocol(self): return self.data["inp"] - def _parse_geom(self): + def _parse_geometry(self): return self.data["xyz"] def _parse_energy(self): @@ -61,37 +58,41 @@ def _parse_energy(self): return evals[-1] def _parse_frequency(self): - # Define columns - columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] - - # Split sections by delimiter - blocks = self.data["hess"].split("$") - - # Search for frequency values - freq_block = [x for x in blocks if x.startswith("ir_spectrum")] - - # Frequency values not found - if len(freq_block) == 0: - return None - - # Grab last frequency block - # Doubtful if more than one, but previous results in list - freq_block = freq_block[-1] - - # Split block into lines - lines = freq_block.split("\n") - - # Map float over values - vals = np.array( - [ - list(map(float, x.split())) - for x in lines - if len(x.split()) == len(columns) - ] - ) + if "hess" in self.data: + # Define columns + columns = ["wavenumber", "eps", "intensity", "TX", "TY", "TZ"] + + # Split sections by delimiter + blocks = self.data["hess"].split("$") + + # Search for frequency values + freq_block = [x for x in blocks if x.startswith("ir_spectrum")] + + # Frequency values not found + if len(freq_block) == 0: + return None + + # Grab last frequency block + # Doubtful if more than one, but previous results in list + freq_block = freq_block[-1] + + # Split block into lines + lines = freq_block.split("\n") + + # Map float over values + vals = np.array( + [ + list(map(float, x.split())) + for x in lines + if len(x.split()) == len(columns) + ] + ) - # Zip columns and values - return dict(zip(columns, vals.T)) + # Zip columns and values + return dict(zip(columns, vals.T)) + + # No frequency info + return None def _parse_timing(self): # Grab only last few lines @@ -297,7 +298,7 @@ def _parse_connectivity(self): def parse(self): result = { "protocol": self._parse_protocol(), - "geom": self._parse_geom(), + "geometry": self._parse_geometry(), "total_dft_energy": self._parse_energy(), "orbital_energies": self._parse_orbital_energies(), "shielding": self._parse_shielding(), @@ -318,6 +319,12 @@ def parse(self): # Filter empty fields result = {k: v for k, v in result.items() if v is not None} + # Add result info to geometry object + if "geometry" in result: + result["geometry"].add___dict__( + {k: v for k, v in result.items() if k != "geometry"} + ) + # Store attribute self.result = result @@ -328,35 +335,22 @@ def save(self, path): class NWChemParser(FileParserInterface): - """ - Extract text from an NWChem simulation output file. - """ + """Extract information from NWChem simulation output files.""" - def __init__(self): - self.contents = None - self.result = None - self.path = None + def __init__(self, data=None): + self.data = data - def load(self, path: str): - """ - Load in the data file - """ - with open(path, "r") as f: - self.contents = f.readlines() - self.path = path - return self.contents + self.result = {} + + def load(self, path): + self.data = isicle.io.load_pickle(path) def _parse_geometry(self): """ Add docstring """ - search = os.path.dirname(self.path) - geoms = sorted(glob.glob(os.path.join(search, "*.xyz"))) - if len(geoms) > 0: - return isicle.io.load(geoms[-1]) - - raise Exception + return self.data["xyz"]["final"] def _parse_energy(self): """ @@ -368,7 +362,7 @@ def _parse_energy(self): energy = None # Cycle through file - for line in self.contents: + for line in self.data["out"].split("\n"): if "Total DFT energy" in line: # Overwrite last saved energy energy = float(line.split()[-1]) @@ -385,7 +379,7 @@ def _parse_shielding(self): shield_atoms = [] shields = [] - for line in self.contents: + for line in self.data["out"].split("\n"): if " SHIELDING" in line: shield_idxs = [int(x) for x in line.split()[2:]] if len(shield_idxs) == 0: @@ -406,12 +400,18 @@ def _parse_shielding(self): if len(shields) > 1: return {"index": shield_idxs, "atom": shield_atoms, "shielding": shields} - raise Exception + # No shielding data found + return None def _parse_spin(self): """ Add docstring """ + + # No spin + if "SPINSPIN" not in self.data["nw"]: + return None + # TO DO: Add g-factors # Declaring couplings @@ -421,7 +421,7 @@ def _parse_spin(self): g_factor = [] ready = False - for line in self.contents: + for line in self.data["out"].split("\n"): if "Atom " in line: line = line.split() idx1 = int((line[1].split(":"))[0]) @@ -443,12 +443,16 @@ def _parse_spin(self): g = float(line[5]) g_factor.append(g) - return { - "pair indices": coup_pairs, - "spin couplings": coup, - "index": index, - "g-tensors": g_factor, - } + if len(coup_pairs) > 0: + return { + "pair indices": coup_pairs, + "spin couplings": coup, + "index": index, + "g-tensors": g_factor, + } + + # No spin data found + return None def _parse_frequency(self): """ @@ -466,7 +470,8 @@ def _parse_frequency(self): natoms = None has_frequency = None - for i, line in enumerate(self.contents): + lines = self.data["out"].split("\n") + for i, line in enumerate(lines): if ("geometry" in line) and (natoms is None): atom_start = i + 7 if ("Atomic Mass" in line) and (natoms is None): @@ -492,19 +497,19 @@ def _parse_frequency(self): if has_frequency is True: freq = np.array( - [float(x.split()[1]) for x in self.contents[freq_start : freq_stop + 1]] + [float(x.split()[1]) for x in lines[freq_start : freq_stop + 1]] ) intensity_au = np.array( - [float(x.split()[3]) for x in self.contents[freq_start : freq_stop + 1]] + [float(x.split()[3]) for x in lines[freq_start : freq_stop + 1]] ) intensity_debyeangs = np.array( - [float(x.split()[4]) for x in self.contents[freq_start : freq_stop + 1]] + [float(x.split()[4]) for x in lines[freq_start : freq_stop + 1]] ) intensity_KMmol = np.array( - [float(x.split()[5]) for x in self.contents[freq_start : freq_stop + 1]] + [float(x.split()[5]) for x in lines[freq_start : freq_stop + 1]] ) intensity_arbitrary = np.array( - [float(x.split()[6]) for x in self.contents[freq_start : freq_stop + 1]] + [float(x.split()[6]) for x in lines[freq_start : freq_stop + 1]] ) return { @@ -519,7 +524,8 @@ def _parse_frequency(self): "zero-point correction": zpe, } - raise Exception + # No frequency data found + return None def _parse_charge(self): """ @@ -531,7 +537,7 @@ def _parse_charge(self): charges = [] ready = False - for line in self.contents: + for line in self.data["out"].split("\n"): # Load charges from table if "Atom Charge Shell Charges" in line: # Table header found. Overwrite anything saved previously @@ -571,7 +577,8 @@ def _parse_charge(self): return df.Charge.values - raise Exception + # No charge data found + return None def _parse_timing(self): """ @@ -588,7 +595,7 @@ def _parse_timing(self): opt = False freq = False - for i, line in enumerate(self.contents): + for i, line in enumerate(self.data["out"].split("\n")): # ? if "No." in line and len(indices) == 0: indices.append(i + 2) # 0 @@ -619,87 +626,56 @@ def _parse_timing(self): # natoms = int(self.contents[indices[1] - 1].split()[0]) - return { - "single point": preoptTime, - "geometry optimization": geomoptTime, - "frequency": freqTime, - "total": cpuTime, - } + if cpuTime != 0: + return { + "single point": preoptTime, + "geometry optimization": geomoptTime, + "frequency": freqTime, + "total": cpuTime, + "success": True, + } + return None def _parse_molden(self): """ Add docstring """ - search = splitext(self.path)[0] - m = glob.glob(search + "*.molden") + if "molden" in self.data: + return self["molden"] - if len(m) != 1: - raise Exception - - return m[0] + return None def _parse_protocol(self): """ Parse out dft protocol """ - functional = [] - basis_set = [] - solvation = [] - tasks = [] - basis = None - func = None - solvent = None - - for line in self.contents: - if "* library" in line: - basis = line.split()[-1] - if " xc " in line: - func = line.split(" xc ")[-1].strip() - if "solvent " in line: - solvent = line.split()[-1] - if "task dft optimize" in line: - tasks.append("optimize") - basis_set.append(basis) - functional.append(func) - solvation.append(solvent) - if "SHIELDING" in line: - tasks.append("shielding") - basis_set.append(basis) - functional.append(func) - solvation.append(solvent) - if "SPINSPIN" in line: - tasks.append("spin") - basis_set.append(basis) - functional.append(func) - solvation.append(solvent) - if "freq " in line: - tasks.append("frequency") - basis_set.append(basis) - functional.append(func) - solvation.append(solvent) - - return { - "functional": functional, - "basis set": basis_set, - "solvation": solvation, - "tasks": tasks, - } + return self.data["nw"] def _parse_connectivity(self): """ Add docstring """ - coor_substr = "internuclear distances" + + # Split lines + lines = self.data["out"].split("\n") # Extracting Atoms & Coordinates - ii = [i for i in range(len(self.contents)) if coor_substr in self.contents[i]] + coor_substr = "internuclear distances" + ii = [i for i in range(len(lines)) if coor_substr in lines[i]] + + # Exit condition + if len(ii) == 0: + return None + + # Sort hits ii.sort() + # Iterate connectivity g = ii[0] + 4 connectivity = [] - while g <= len(self.contents) - 1: - if "-" not in self.contents[g]: - line = self.contents[g].split() + while g <= len(lines) - 1: + if "-" not in lines[g]: + line = lines[g].split() pair = [line[1], line[4], int(line[0]), int(line[3])] connectivity.append(pair) @@ -707,85 +683,49 @@ def _parse_connectivity(self): break g += 1 - return connectivity - - def parse(self): - """ - Extract relevant information from NWChem output. - - Parameters - ---------- - to_parse : list of str - geometry, energy, shielding, spin, frequency, molden, charge, timing - - """ - - # Check that the file is valid first - if len(self.contents) == 0: - raise RuntimeError("No contents to parse: {}".format(self.path)) - if "Total times cpu" not in self.contents[-1]: - raise RuntimeError("Incomplete NWChem run: {}".format(self.path)) - - # Initialize result object to store info - result = {} - - try: - result["protocol"] = self._parse_protocol() - except: - pass - - try: - result["geom"] = self._parse_geometry() - - except: - pass - - try: - result["energy"] = self._parse_energy() - except: - pass + # Check for result + if len(connectivity) > 0: + return connectivity - try: - result["shielding"] = self._parse_shielding() - except: # Must be no shielding info - pass - - try: - result["spin"] = self._parse_spin() - except: - pass + return None - try: - result["frequency"] = self._parse_frequency() - except: - pass + def parse(self): + result = { + "protocol": self._parse_protocol(), + "geometry": self._parse_geometry(), + "total_dft_energy": self._parse_energy(), + # "orbital_energies": self._parse_orbital_energies(), + "shielding": self._parse_shielding(), + "spin": self._parse_spin(), + "frequency": self._parse_frequency(), + "molden": self._parse_molden(), + "charge": self._parse_charge(), + "timing": self._parse_timing(), + "connectivity": self._parse_connectivity(), + } - try: - result["molden"] = self._parse_molden() - except: - pass + # Pop success from timing + if result["timing"] is not None: + result["success"] = result["timing"].pop("success") + else: + result["success"] = False - try: - result["charge"] = self._parse_charge() - except: - pass + # Filter empty fields + result = {k: v for k, v in result.items() if v is not None} - try: - result["timing"] = self._parse_timing() - except: - pass + # Add result info to geometry object + if "geometry" in result: + result["geometry"].add___dict__( + {k: v for k, v in result.items() if k != "geometry"} + ) - try: - result["connectivity"] = self._parse_connectivity() - except: - pass + # Store attribute + self.result = result return result def save(self, path): - with open(path, "wb") as f: - pickle.dump(self, f) - return + isicle.io.save_pickle(path, self.result) class ImpactParser(FileParserInterface): @@ -933,22 +873,18 @@ class XTBParser(FileParserInterface): Add docstring """ - def __init__(self): - """ - Add docstring - """ - self.contents = None - self.result = None - self.path = None + def __init__(self, data=None): + self.data = data - def load(self, path: str): - """ - Load in the data file - """ - with open(path, "r") as f: - self.contents = f.readlines() - self.path = path - # return self.contents + self.result = {} + + if data is not None: + self.lines = self.data["out"].split("\n") + + def load(self, path): + self.data = isicle.io.load_pickle(path) + + self.lines = self.data["out"].split("\n") def _crest_energy(self): """ @@ -959,11 +895,11 @@ def _crest_energy(self): population = [] ready = False - for h in range(len(self.contents), 0, -1): - if "Erel/kcal" in self.contents[h]: + for h in range(len(self.lines) - 1, -1, -1): + if "Erel/kcal" in self.lines[h]: g = h + 1 - for j in range(g, len(self.contents)): - line = self.contents[j].split() + for j in range(g, len(self.lines)): + line = self.lines[j].split() if len(line) == 8: relative_energy.append(float(line[1])) total_energy.append(float(line[2])) @@ -972,7 +908,7 @@ def _crest_energy(self): if "/K" in line[1]: break - if ready == True: + if ready is True: break return { @@ -993,7 +929,7 @@ def grab_time(line): return ":".join(line[1:]).strip("\n") ready = False - for line in self.contents: + for line in self.lines: if "test MD wall time" in line: test_MD = grab_time(line) ready = True @@ -1004,7 +940,7 @@ def grab_time(line): if "multilevel OPT wall time" in line: multilevel_OPT = grab_time(line) - if "MD wall time" in line and ready == True: + if "MD wall time" in line and ready is True: MD = grab_time(line) ready = False @@ -1030,19 +966,19 @@ def _isomer_energy(self): complete = False relative_energies = [] total_energies = [] - for i in range(len(self.contents), 0, -1): - if "structure ΔE(kcal/mol) Etot(Eh)" in self.contents[i]: + for i in range(len(self.lines) - 1, -1, -1): + if "structure ΔE(kcal/mol) Etot(Eh)" in self.lines[i]: h = i + 1 - for j in range(h, len(self.contents)): - if self.contents[j] != " \n": - line = self.contents[j].split() + for j in range(h, len(self.lines)): + if self.lines[j] != " \n": + line = self.lines[j].split() relative_energies.append(float(line[1])) total_energies.append(float(line[2])) else: complete = True break - if complete == True: + if complete is True: break return {"relative energy": relative_energies, "total energy": total_energies} @@ -1058,7 +994,7 @@ def grab_time(line): return ":".join(line[1:]).strip("\n") - for line in self.contents: + for line in self.lines: if "LMO calc. wall time" in line: LMO_time = grab_time(line) @@ -1078,7 +1014,7 @@ def _opt_energy(self): """ Add docstring """ - for line in self.contents: + for line in self.lines: if "TOTAL ENERGY" in line: energy = line.split()[3] + " Hartrees" @@ -1099,7 +1035,7 @@ def grab_time(line): scf = False anc = False - for line in self.contents: + for line in self.lines: if "wall-time" in line and tot is False: total_time = grab_time(line) tot = True @@ -1118,62 +1054,45 @@ def grab_time(line): "ANC optimizer wall time": anc_time, } - def _parse_energy(self): - """ - Add docstring - """ - if self.parse_crest == True: - return self._crest_energy() - if self.parse_opt == True: - return self._opt_energy() - if self.parse_isomer == True: - return self._isomer_energy() - - def _parse_timing(self): - """ - Add docstring - """ - if self.parse_crest == True: - return self._crest_timing() - if self.parse_opt == True: - return self._opt_timing() - if self.parse_isomer == True: - return self._isomer_timing() - def _parse_protocol(self): """ Add docstring """ protocol = None - for line in self.contents: + for line in self.lines: if " > " in line: protocol = line.strip("\n") if "program call" in line: protocol = (line.split(":")[1]).strip("\n") return protocol - def _parse_xyz(self): + def _parse_geometry(self): """ Split .xyz into separate XYZGeometry instances """ - - FILE = self.xyz_path - if len(list(pybel.readfile("xyz", FILE))) > 1: - geom_list = [] - count = 1 - XYZ = FILE.split(".")[0] - for geom in pybel.readfile("xyz", FILE): - geom.write("xyz", "%s_%d.xyz" % (XYZ, count)) - geom_list.append("%s_%d.xyz" % (XYZ, count)) - count += 1 - - x = [isicle.io.load(i) for i in geom_list] - - else: - x = [isicle.io.load(self.xyz_path)] - - return isicle.conformers.ConformationalEnsemble(x) + geometries = {} + # Add geometry info + for key in [ + "conformers", + "rotamers", + "final", + "best", + "protonated", + "deprotonated", + "tautomers", + ]: + if key in self.data: + geometries[key] = self.data[key] + + if len(geometries) > 1: + return geometries + + return geometries.popitem()[1] + + # TODO + def _parse_orbital_energies(self): + pass def parse(self): """ @@ -1181,10 +1100,10 @@ def parse(self): """ # Check that the file is valid first - if len(self.contents) == 0: + if len(self.lines) == 0: raise RuntimeError("No contents to parse: {}".format(self.path)) - last_lines = "".join(self.contents[-10:]) + last_lines = "".join(self.lines[-10:]) if ( ("terminat" not in last_lines) & ("normal" not in last_lines) @@ -1192,70 +1111,29 @@ def parse(self): ): raise RuntimeError("XTB job failed: {}".format(self.path)) - self.parse_crest = False - self.parse_opt = False - self.parse_isomer = False - # Initialize result object to store info - result = {} - result["protocol"] = self._parse_protocol() - - try: - result["timing"] = self._parse_timing() - except: - pass - - try: - result["energy"] = self._parse_energy() - except: - pass - - # Parse geometry from assoc. XYZ file - try: - if self.path.endswith("xyz"): - try: - self.xyz_path = self.path - result["geom"] = self._parse_xyz() - - except: - pass - - if self.path.endswith("out") or self.path.endswith("log"): - # try geometry parsing - try: - XYZ = None - if result["protocol"].split()[0] == "xtb": - self.parse_opt = True - XYZ = "xtbopt.xyz" - if result["protocol"].split()[1] == "crest": - if "-deprotonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "deprotonated.xyz" - elif "-protonate" in result["protocol"]: - self.parse_isomer = True - XYZ = "protonated.xyz" - elif "-tautomer" in result["protocol"]: - self.parse_isomer = True - XYZ = "tautomers.xyz" - else: - self.parse_crest = True - XYZ = "crest_conformers.xyz" - - if XYZ is None: - raise RuntimeError( - "XYZ file associated with XTB job not available,\ - please parse separately." - ) + result = { + "protocol": self._parse_protocol(), + "geometry": self._parse_geometry() + } - else: - temp_dir = os.path.dirname(self.path) - self.xyz_path = os.path.join(temp_dir, XYZ) + if result["protocol"].split()[0] == "xtb": + result["timing"] = self._opt_timing() + result["energy"] = self._opt_energy() + + elif result["protocol"].split()[1] == "crest": + if any( + [ + x in result["protocol"] + for x in ["-deprotonate", "-protonate", "-tautomer"] + ] + ): + result["timing"] = self._isomer_timing() + result["energy"] = self._isomer_energy() + else: + result["timing"] = self._crest_timing() + result["energy"] = self._crest_energy() - result["geom"] = self._parse_xyz() - except: - pass - except: - pass return result def save(self, path): diff --git a/isicle/qm.py b/isicle/qm.py index 1927641..b03f2fd 100644 --- a/isicle/qm.py +++ b/isicle/qm.py @@ -1,12 +1,12 @@ import glob import os import subprocess +from collections import OrderedDict from itertools import cycle from string import Template import isicle from isicle.interfaces import WrapperInterface -from isicle.parse import NWChemParser from isicle.utils import safelist @@ -117,7 +117,6 @@ def set_geometry(self, geom): # Assign geometry self.geom = geom.__copy__() - self.basename = self.geom.basename # Save self._save_geometry() @@ -180,15 +179,10 @@ def _configure_header(self, scratch_dir=None, mem_global=1600, 'echo\n' 'print low\n').format(**d) - def _configure_load(self, charge=0): + def _configure_load(self): """ Generate geometry load block of NWChem configuration. - Parameters - ---------- - charge : int - Nominal charge of the molecule to be optimized. - Returns ------- str @@ -198,7 +192,7 @@ def _configure_load(self, charge=0): d = {'basename': self.geom.basename, 'dirname': self.temp_dir, - 'charge': charge} + 'charge': self.geom.get_charge()} return ('\ncharge {charge}\n' 'geometry noautoz noautosym\n' @@ -625,7 +619,7 @@ def generate_pairs(mol, bonds=bonds): return s def configure(self, tasks='energy', functional='b3lyp', - basis_set='6-31g*', ao_basis='cartesian', charge=0, + basis_set='6-31g*', ao_basis='cartesian', atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, mem_stack=600, scratch_dir=None, processes=12): @@ -644,8 +638,6 @@ def configure(self, tasks='energy', functional='b3lyp', ao_basis : str or list of str Angular function selection ("spherical", "cartesian"). Supply globally or per task. - charge : int - Nominal charge of the molecule to be optimized. atoms : list of str Atom types of interest. Only used for `spin` and `shielding` tasks. temp : float @@ -721,7 +713,7 @@ def configure(self, tasks='energy', functional='b3lyp', mem_stack=mem_stack) # Load geometry - config += self._configure_load(charge=charge) + config += self._configure_load() # Configure tasks for task, f, b, a, c, so in zip(tasks, cycle(functional), cycle(basis_set), @@ -842,27 +834,52 @@ def finish(self): """ - # Initialize parser - parser = NWChemParser() + # Get list of outputs + outfiles = glob.glob(os.path.join(self.temp_dir, '*')) - # Open output file - self.output = parser.load(os.path.join(self.temp_dir, - self.basename + '.out')) + # Result container + result = {} - # Parse results - result = parser.parse() + # Split out geometry files + geomfiles = sorted([x for x in outfiles if x.endswith('.xyz')]) + outfiles = sorted([x for x in outfiles if not x.endswith('.xyz')]) - # Update this instance attributes - self.__dict__.update(result) + # Enumerate geometry files + result['xyz'] = OrderedDict() + for geomfile in geomfiles: + geom = isicle.load(geomfile, charge=self.geom.get_charge()) + + if '_geom-' in geomfile: + idx = int(os.path.basename(geomfile).split('-')[-1].split('.')[0]) + result['xyz'][idx] = geom - # Update geom attributes - self.geom.add___dict__( - {k: v for k, v in result.items() if k != 'geom'}) + else: + result['xyz']['input'] = geom + + # Rename final geometry + result['xyz']['final'] = list(result['xyz'].values())[-1] - return self + # Enumerate output files + for outfile in outfiles: + # Split name and extension + basename, ext = os.path.basename(outfile).rsplit('.', 1) + + # Read output content + with open(outfile, 'rb') as f: + contents = f.read() + + # Attempt utf-8 decode + try: + result[ext] = contents.decode('utf-8') + except UnicodeDecodeError: + result[ext] = contents + + # Assign to attribute + self.result = result + return self.result def run(self, geom, template=None, tasks='energy', functional='b3lyp', - basis_set='6-31g*', ao_basis='cartesian', charge=0, + basis_set='6-31g*', ao_basis='cartesian', atoms=['C', 'H'], bonds=1, temp=298.15, cosmo=False, solvent='H2O', gas=False, max_iter=150, mem_global=1600, mem_heap=100, mem_stack=600, scratch_dir=None, processes=12): @@ -886,8 +903,6 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', ao_basis : str or list of str Angular function selection ("spherical", "cartesian"). Supply globally or per task. - charge : int - Nominal charge of the molecule to be optimized. atoms : list of str Atom types of interest. Only used for `spin` and `shielding` tasks. temp : float @@ -929,9 +944,8 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', else: self.configure(tasks=tasks, functional=functional, basis_set=basis_set, - ao_basis=ao_basis, charge=charge, - atoms=atoms, bonds=bonds, temp=temp, - cosmo=cosmo, solvent=solvent, gas=gas, + ao_basis=ao_basis,atoms=atoms, bonds=bonds, + temp=temp, cosmo=cosmo, solvent=solvent, gas=gas, max_iter=max_iter, mem_global=mem_global, mem_heap=mem_heap, mem_stack=mem_stack, scratch_dir=scratch_dir, processes=processes) @@ -944,6 +958,22 @@ def run(self, geom, template=None, tasks='energy', functional='b3lyp', return result + def save(self, path): + """ + Save result as pickle file. + + Parameters + ---------- + path : str + Path to output file. + + """ + + if hasattr(self, 'result'): + isicle.io.save_pickle(path, self.result) + else: + raise AttributeError("Object must have `result` attribute") + class ORCAWrapper(WrapperInterface): """ @@ -988,8 +1018,7 @@ def set_geometry(self, geom): """ # Assign geometry - self.geom = geom.__copy__() - self.basename = self.geom.basename + self.geom = geom.__copy__() # Save self._save_geometry() @@ -1011,7 +1040,7 @@ def _save_geometry(self): # Store path self.geom.path = geomfile - def configure(self, simple_input=[], block_input={}, processes=1, **kwargs): + def configure(self, simple_input=[], block_input={}, spin_multiplicity=1, processes=1, **kwargs): """ Configure ORCA simulation. @@ -1026,6 +1055,8 @@ def configure(self, simple_input=[], block_input={}, processes=1, **kwargs): directly, include as a complete string. Include key:value pairs as tuples. See `this `__ section of the ORCA docs. + spin_multiplicity : int + Spin multiplicity of the molecule. processes : int Number of parallel processes. kwargs @@ -1049,7 +1080,7 @@ def configure(self, simple_input=[], block_input={}, processes=1, **kwargs): config += '%PAL NPROCS {} END\n'.format(processes) # Add geometry context - config += '* xyzfile 0 1 {}\n'.format(self.geom.path) + config += '* xyzfile {:d} {:d} {}\n'.format(self.geom.get_charge(), spin_multiplicity, self.geom.path) # Expand keyword args for k, v in kwargs.items(): @@ -1144,15 +1175,21 @@ def finish(self): else: var_name = ext - # Read output content - with open(outfile, 'rb') as f: - contents = f.read() + # Load geometry + if var_name == 'xyz': + result[var_name] = isicle.load(outfile, charge=self.geom.get_charge()) - # Attempt utf-8 decode - try: - result[var_name] = contents.decode('utf-8') - except UnicodeDecodeError: - result[var_name] = contents + # Load other files + else: + # Read output content + with open(outfile, 'rb') as f: + contents = f.read() + + # Attempt utf-8 decode + try: + result[var_name] = contents.decode('utf-8') + except UnicodeDecodeError: + result[var_name] = contents # Assign to attribute self.result = result