Skip to content

Commit

Permalink
Merge pull request #48 from molssi-seamm/dev
Browse files Browse the repository at this point in the history
Supporting space group symmetry
  • Loading branch information
seamm authored Aug 30, 2023
2 parents fbd7d14 + 4593bf4 commit 444e31a
Show file tree
Hide file tree
Showing 16 changed files with 2,203 additions and 159 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
History
=======

2023.8.30 -- Support for spacegroup symmetry

2023.7.28 -- Implemented ranges for reading XYZ and SDF files.

2023.7.27.1 -- Removed debug printing.
Expand Down
255 changes: 174 additions & 81 deletions read_structure_step/formats/cif/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@
The cif/mmcif reader/writer
"""

import bz2
import gzip
import logging
from pathlib import Path
import time

from ..registries import register_format_checker
from ..registries import register_reader
from ..registries import set_format_metadata
from seamm_util.printing import FormattedText as __
from ...utils import parse_indices

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -56,7 +60,7 @@ def load_cif(
add_hydrogens=False,
system_db=None,
system=None,
indices="1:end",
indices="1-end",
subsequent_as_configurations=False,
system_name="from file",
configuration_name="sequential",
Expand Down Expand Up @@ -90,7 +94,7 @@ def load_cif(
system : System = None
The system to use if adding subsequent structures as configurations.
indices : str = "1:end"
indices : str = "1-end"
The generalized indices (slices, SMARTS, etc.) to select structures
from a file containing multiple structures.
Expand Down Expand Up @@ -123,21 +127,64 @@ def load_cif(
if isinstance(path, str):
path = Path(path)

path.expanduser().resolve()
path = path.expanduser().resolve()

# Get the information for progress output, if requested.
n_records = 0
with (
gzip.open(path, mode="rt")
if path.suffix == ".gz"
else bz2.open(path, mode="rt")
if path.suffix == ".bz2"
else open(path, "r")
) as fd:
for line in fd:
if line[0:5] == "data_":
n_records += 1
if printer is not None:
printer("")
printer(f" The CIF file contains {n_records} data blocks.")
last_percent = 0
t0 = time.time()
last_t = t0

# Get the indices to pick
indices = parse_indices(indices, n_records)
n_structures = len(indices)
if n_structures == 0:
return
stop = indices[-1]

configurations = []
record_no = 0
structure_no = 0
n_errors = 0
lines = []
in_block = False
block_name = ""
with open(path, "r") as fd:
with (
gzip.open(path, mode="rt")
if path.suffix == ".gz"
else bz2.open(path, mode="rt")
if path.suffix == ".bz2"
else open(path, "r")
) as fd:
for line in fd:
if line[0:5] == "data_":
logger.debug(f"Found block {line}")
if not in_block:
in_block = True
block_name = line[5:].strip()
else:
record_no += 1
if record_no > stop:
lines = []
break
if record_no not in indices:
lines = []
lines.append(line)
continue

structure_no += 1
if structure_no > 1:
if subsequent_as_configurations:
Expand All @@ -146,91 +193,137 @@ def load_cif(
system = system_db.create_system()
configuration = system.create_configuration()

text = configuration.from_cif_text("\n".join(lines))
if text != "":
printer("\n")
printer(__(text, indent=4 * " "))

configurations.append(configuration)

logger.debug(f" added system {system_db.n_systems}: {block_name}")

# Set the system name
if system_name is not None and system_name != "":
lower_name = str(system_name).lower()
if "from file" in lower_name:
system.name = block_name
elif "file name" in lower_name:
system.name = path.stem
elif "formula" in lower_name:
system.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
system.name = configuration.formula()[1]
else:
system.name = str(system_name)

# And the configuration name
if configuration_name is not None and configuration_name != "":
lower_name = str(configuration_name).lower()
if "from file" in lower_name:
configuration.name = block_name
elif "file name" in lower_name:
configuration.name = path.stem
elif "formula" in lower_name:
configuration.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
configuration.name = configuration.formula()[1]
else:
configuration.name = str(configuration_name)
logger.debug(f" added system {system_db.n_systems}: {block_name}")
try:
text = configuration.from_cif_text("".join(lines))
if text != "":
printer("\n")
printer(__(text, indent=4 * " "))
except Exception as e:
n_errors += 1
printer("")
printer(f" Error handling entry {record_no} in the CIF file:")
printer(" " + str(e))
printer(" Text of the entry is")
printer(" " + 60 * "-")
for tmp in lines:
printer(" " + tmp.rstrip())
printer(" " + 60 * "-")
printer("")
if n_structures <= 1:
raise
else:
configurations.append(configuration)

logger.debug(
f" added system {system_db.n_systems}: {block_name}"
)

# Set the system name
if system_name is not None and system_name != "":
lower_name = str(system_name).lower()
if "from file" in lower_name:
system.name = block_name
elif "file name" in lower_name:
system.name = path.stem
elif "formula" in lower_name:
system.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
system.name = configuration.formula()[1]
else:
system.name = str(system_name)

# And the configuration name
if configuration_name is not None and configuration_name != "":
lower_name = str(configuration_name).lower()
if "from file" in lower_name:
configuration.name = block_name
elif "file name" in lower_name:
configuration.name = path.stem
elif "formula" in lower_name:
configuration.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
configuration.name = configuration.formula()[1]
else:
configuration.name = str(configuration_name)
logger.debug(
f" added system {system_db.n_systems}: {block_name}"
)

if printer:
percent = int(100 * structure_no / n_structures)
if percent > last_percent:
t1 = time.time()
if t1 - last_t >= 60:
t = int(t1 - t0)
rate = structure_no / (t1 - t0)
t_left = int((n_structures - structure_no) / rate)
printer(
f"\t{structure_no:6} ({percent}%) structures "
f"read in {t} seconds. About {t_left} seconds "
"remaining."
)
last_t = t1
last_percent = percent
block_name = line[5:].strip()
lines = []
lines.append(line)

if len(lines) > 0:
# The last block just ends at the end of the file
structure_no += 1
if structure_no > 1:
if subsequent_as_configurations:
configuration = system.create_configuration()
else:
system = system_db.create_system()
configuration = system.create_configuration()

text = configuration.from_cif_text("\n".join(lines))
logger.debug(f" added system {system_db.n_systems}: {block_name}")
if text != "":
printer("\n")
printer(__(text, indent=4 * " "))
record_no += 1
if record_no in indices:
structure_no += 1
if structure_no > 1:
if subsequent_as_configurations:
configuration = system.create_configuration()
else:
system = system_db.create_system()
configuration = system.create_configuration()

text = configuration.from_cif_text("".join(lines))
logger.debug(f" added system {system_db.n_systems}: {block_name}")
if text != "":
printer("\n")
printer(__(text, indent=4 * " "))

configurations.append(configuration)

# Set the system name
if system_name is not None and system_name != "":
lower_name = str(system_name).lower()
if "from file" in lower_name:
system.name = block_name
elif "file name" in lower_name:
system.name = path.stem
elif "formula" in lower_name:
system.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
system.name = configuration.formula()[1]
else:
system.name = str(system_name)

# And the configuration name
if configuration_name is not None and configuration_name != "":
lower_name = str(configuration_name).lower()
if "from file" in lower_name:
configuration.name = block_name
elif "file name" in lower_name:
configuration.name = path.stem
elif "formula" in lower_name:
configuration.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
configuration.name = configuration.formula()[1]
else:
configuration.name = str(configuration_name)
# Set the system name
if system_name is not None and system_name != "":
lower_name = str(system_name).lower()
if "from file" in lower_name:
system.name = block_name
elif "file name" in lower_name:
system.name = path.stem
elif "formula" in lower_name:
system.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
system.name = configuration.formula()[1]
else:
system.name = str(system_name)

# And the configuration name
if configuration_name is not None and configuration_name != "":
lower_name = str(configuration_name).lower()
if "from file" in lower_name:
configuration.name = block_name
elif "file name" in lower_name:
configuration.name = path.stem
elif "formula" in lower_name:
configuration.name = configuration.formula()[0]
elif "empirical formula" in lower_name:
configuration.name = configuration.formula()[1]
else:
configuration.name = str(configuration_name)

if printer:
t1 = time.time()
rate = structure_no / (t1 - t0)
printer(
f" Read {structure_no - n_errors} structures in {t1 - t0:.1f} "
f"seconds = {rate:.2f} per second"
)
if n_errors > 0:
printer(f" {n_errors} structures could not be read due to errors.")

return configurations
18 changes: 17 additions & 1 deletion read_structure_step/formats/mop/obabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from openbabel import openbabel
from read_structure_step.formats.registries import register_reader
import seamm
from seamm_util import Q_
from .find_mopac import find_mopac

if "OpenBabel_version" not in globals():
Expand Down Expand Up @@ -239,6 +240,8 @@ def load_mop(
# Finally, the MOPAC test data usually has three comment lines to start, with a
# single number on the second line, which is the heat of formation calculated by
# MOPAC. If this format is found the HOF is captured.
kcal2kJ = Q_(1, "kcal").m_as("kJ")

run_mopac = False
keywords = []
description_lines = []
Expand Down Expand Up @@ -605,9 +608,10 @@ def load_mop(
key,
"float",
description="The reference energy from MOPAC",
units="kJ/mol",
noerror=True,
)
properties.put(key, energy)
properties.put(key, energy * kcal2kJ)

# Handle properties encoded in the description
if len(description_lines) == 2 and "=" in description_lines[1]:
Expand Down Expand Up @@ -651,7 +655,19 @@ def load_mop(
description=f"stderr for the {keyword}.",
noerror=True,
)
if (
"heat capacity" in keyword
or "enthalpy" in keyword
or "entropy" in keyword
):
stderr = float(stderr) * kcal2kJ
system_properties.put(new_keyword, stderr)
if (
"heat capacity" in keyword
or "enthalpy" in keyword
or "entropy" in keyword
):
value = float(value) * kcal2kJ
system_properties.put(keyword, value)
except Exception as e:
print(f"{e}: {key}")
Expand Down
Loading

0 comments on commit 444e31a

Please sign in to comment.