diff --git a/HISTORY.rst b/HISTORY.rst index e11fd00..9b294a5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,8 @@ History ======= +2023.7.28 -- Implemented ranges for reading XYZ and SDF files. + 2023.7.27.1 -- Removed debug printing. 2023.7.27 -- Support for .gz and .bz2 files, and multi-structure .xyz files diff --git a/read_structure_step/formats/sdf/sdf.py b/read_structure_step/formats/sdf/sdf.py index 51c0c0b..416da91 100644 --- a/read_structure_step/formats/sdf/sdf.py +++ b/read_structure_step/formats/sdf/sdf.py @@ -134,29 +134,43 @@ def load_sdf( path.expanduser().resolve() # Get the information for progress output, if requested. + n_structures = 0 + with ( + gzip.open(path, mode="rt") + if path.suffix == ".gz" + else bz2.open(path, mode="rt") + if path.suffix == ".bz2" + else open(path, "r") + ) as fd: + for line in fd: + if line[0:4] == "$$$$": + n_structures += 1 if printer is not None: - n_structures = 0 - with ( - gzip.open(path, mode="rt") - if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") - ) as fd: - for line in fd: - if line[0:4] == "$$$$": - n_structures += 1 printer("") printer(f" The SDF file contains {n_structures} structures.") last_percent = 0 t0 = time.time() last_t = t0 + # Get the indices to pick + tmp = indices.replace("end", str(n_structures + 1)) + tmp = tmp.split(":") + start = int(tmp[0]) + if len(tmp) == 3: + step = int(tmp[2]) + else: + step = 1 + if len(tmp) == 2: + stop = int(tmp[1]) + else: + stop = start + 1 + indices = list(range(start, stop, step)) + obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("sdf", "smi") configurations = [] - structure_no = 1 + structure_no = 0 n_errors = 0 obMol = openbabel.OBMol() text = "" @@ -173,6 +187,12 @@ def load_sdf( if line[0:4] != "$$$$": continue + structure_no += 1 + if structure_no >= stop: + break + if structure_no not in indices: + continue + obConversion.ReadString(obMol, text) if add_hydrogens: @@ -185,7 +205,6 @@ def load_sdf( system = system_db.create_system() configuration = system.create_configuration() - structure_no += 1 try: configuration.from_OBMol(obMol) except Exception as e: diff --git a/read_structure_step/formats/xyz/xyz.py b/read_structure_step/formats/xyz/xyz.py index 73adb2d..bd9f9df 100644 --- a/read_structure_step/formats/xyz/xyz.py +++ b/read_structure_step/formats/xyz/xyz.py @@ -240,6 +240,20 @@ def load_xyz( t0 = time.time() last_t = t0 + # Get the indices to pick + tmp = indices.replace("end", str(n_structures + 1)) + tmp = tmp.split(":") + start = int(tmp[0]) + if len(tmp) == 3: + step = int(tmp[2]) + else: + step = 1 + if len(tmp) == 2: + stop = int(tmp[1]) + else: + stop = start + 1 + indices = list(range(start, stop, step)) + obConversion = openbabel.OBConversion() obConversion.SetInFormat("xyz") @@ -264,6 +278,12 @@ def load_xyz( line_no += 1 lines.append(line) if total_lines == last_line or line_no > 3 and line.strip() == "": + structure_no += 1 + if structure_no >= stop: + break + if structure_no not in indices: + continue + # End of block, so examine the first lines and see which format file_type = "unknown" n_lines = len(lines) @@ -362,7 +382,6 @@ def load_xyz( if add_hydrogens: obMol.AddHydrogens() - structure_no += 1 if structure_no > 1: if subsequent_as_configurations: configuration = system.create_configuration() @@ -391,7 +410,7 @@ def load_xyz( elif "|" in title: for tmp in title.split("|"): if "=" in tmp: - key, val = tmp.split(maxsplit=1) + key, val = tmp.split("=", maxsplit=1) key = key.strip() val = val.strip() if key == "q": diff --git a/read_structure_step/read.py b/read_structure_step/read.py index 7e13ce9..11db8c7 100644 --- a/read_structure_step/read.py +++ b/read_structure_step/read.py @@ -14,7 +14,7 @@ def read( add_hydrogens=False, system_db=None, system=None, - indices=None, + indices="1:end", subsequent_as_configurations=False, system_name=None, configuration_name=None,