diff --git a/HISTORY.rst b/HISTORY.rst index 5092484..b5729b5 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,12 @@ ======= History ======= +2024.7.28 -- Added new names for systems and configurations + * Made the naming of systems and configurations consistent with the standard + parameters for them in the GUI. + * Removed all "from file" options, replacing them with "title", which means the title + from the file, if it exists, or the filename if it doesn't. + 2023.11.16 -- Bugfix: titles in SDF files * Crashed reading some SDF files write by SEAMM due to the system and configuration names encoded in the title having multiple slashes (/). diff --git a/read_structure_step/formats/cif/cif.py b/read_structure_step/formats/cif/cif.py index dc00f76..68fb324 100644 --- a/read_structure_step/formats/cif/cif.py +++ b/read_structure_step/formats/cif/cif.py @@ -135,9 +135,7 @@ def load_cif( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: for line in fd: if line[0:5] == "data_": @@ -166,9 +164,7 @@ def load_cif( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: for line in fd: if line[0:5] == "data_": @@ -378,9 +374,7 @@ def write_cif( with ( gzip.open(path, mode="wb") if path.suffix == ".gz" - else bz2.open(path, mode="wb") - if path.suffix == ".bz2" - else open(path, "w") + else bz2.open(path, mode="wb") if path.suffix == ".bz2" else open(path, "w") ) as fd: for configuration in configurations: text = configuration.to_cif_text() diff --git a/read_structure_step/formats/cif/mmcif.py b/read_structure_step/formats/cif/mmcif.py index 75a08b0..fbb8c85 100644 --- a/read_structure_step/formats/cif/mmcif.py +++ b/read_structure_step/formats/cif/mmcif.py @@ -295,9 +295,7 @@ def write_mmcif( with ( gzip.open(path, mode="wb") if path.suffix == ".gz" - else bz2.open(path, mode="wb") - if path.suffix == ".bz2" - else open(path, "w") + else bz2.open(path, mode="wb") if path.suffix == ".bz2" else open(path, "w") ) as fd: for configuration in configurations: text = configuration.to_mmcif_text() diff --git a/read_structure_step/formats/mol2/mol2.py b/read_structure_step/formats/mol2/mol2.py index 22cda19..9b7a2ae 100644 --- a/read_structure_step/formats/mol2/mol2.py +++ b/read_structure_step/formats/mol2/mol2.py @@ -101,7 +101,7 @@ def load_mol2( Normally and subsequent structures are loaded into new systems; however, if this option is True, they will be added as configurations. - system_name : str = "from file" + system_name : str = "title" The name for systems. Can be directives like "SMILES" or "Canonical SMILES". If None, no name is given. @@ -174,24 +174,44 @@ def load_mol2( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + system.name = tmp + else: + system.name = f"{path.stem} {structure_no}" elif "canonical smiles" in lower_name: system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: - configuration.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + configuration.name = tmp + else: + configuration.name = f"{path.stem} {structure_no}" elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = str(structure_no) else: diff --git a/read_structure_step/formats/mop/obabel.py b/read_structure_step/formats/mop/obabel.py index 4d9d181..ad977ba 100644 --- a/read_structure_step/formats/mop/obabel.py +++ b/read_structure_step/formats/mop/obabel.py @@ -497,9 +497,7 @@ def load_mop( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = str(path) - elif lower_name == "title": + if lower_name == "title": if len(description_lines) > 0: system.name = description_lines[0] else: @@ -508,18 +506,30 @@ def load_mop( system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: + if lower_name == "title": configuration.name = obMol.GetTitle() elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = "1" else: diff --git a/read_structure_step/formats/openbabel_io/obabel.py b/read_structure_step/formats/openbabel_io/obabel.py index c5516b5..de610bc 100644 --- a/read_structure_step/formats/openbabel_io/obabel.py +++ b/read_structure_step/formats/openbabel_io/obabel.py @@ -22,7 +22,7 @@ def load_file( path, configuration, extension=".sdf", - add_hydrogens=True, + add_hydrogens=False, system_db=None, system=None, indices="1:end", @@ -111,24 +111,44 @@ def load_file( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + system.name = tmp + else: + system.name = path.stem elif "canonical smiles" in lower_name: system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: - configuration.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + configuration.name = tmp + else: + configuration.name = path.stem elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = "1" else: diff --git a/read_structure_step/formats/sdf/sdf.py b/read_structure_step/formats/sdf/sdf.py index c21baba..0b92880 100644 --- a/read_structure_step/formats/sdf/sdf.py +++ b/read_structure_step/formats/sdf/sdf.py @@ -64,7 +64,7 @@ def load_sdf( system=None, indices="1-end", subsequent_as_configurations=False, - system_name="Canonical SMILES", + system_name="title", configuration_name="sequential", printer=None, references=None, @@ -105,7 +105,7 @@ def load_sdf( Normally and subsequent structures are loaded into new systems; however, if this option is True, they will be added as configurations. - system_name : str = "from file" + system_name : str = "title" The name for systems. Can be directives like "SMILES" or "Canonical SMILES". If None, no name is given. @@ -139,9 +139,7 @@ def load_sdf( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: for line in fd: if line[0:4] == "$$$$": @@ -172,9 +170,7 @@ def load_sdf( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: for line in fd: text += line @@ -213,7 +209,7 @@ def load_sdf( if subsequent_as_configurations: configuration = system.create_configuration() else: - if have_sysname and "from file" in system_name.lower(): + if have_sysname and "title" in system_name.lower(): # Reuse the system if it exists if system_db.system_exists(sysname): system = system_db.get_system(sysname) @@ -245,24 +241,42 @@ def load_sdf( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = sysname + if lower_name == "title": + if sysname != "": + system.name = sysname + else: + system.name = f"{path.stem}_{record_no}" elif "canonical smiles" in lower_name: system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: - configuration.name = confname + if lower_name == "title": + if confname != "": + configuration.name = confname + else: + configuration.name = f"{path.stem}_{record_no}" elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = str(record_no) else: diff --git a/read_structure_step/formats/smi/smi.py b/read_structure_step/formats/smi/smi.py index 69f48d0..ed8735c 100644 --- a/read_structure_step/formats/smi/smi.py +++ b/read_structure_step/formats/smi/smi.py @@ -188,24 +188,44 @@ def load_mol2( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + system.name = tmp + else: + system.name = f"{path.stem}_{structure_no}" elif "canonical smiles" in lower_name: system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: - configuration.name = obMol.GetTitle() + if lower_name == "title": + tmp = obMol.GetTitle() + if tmp != "": + configuration.name = tmp + else: + configuration.name = f"{path.stem}_{structure_no}" elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = str(structure_no) else: diff --git a/read_structure_step/formats/xyz/xyz.py b/read_structure_step/formats/xyz/xyz.py index deb78ea..ab78dcd 100644 --- a/read_structure_step/formats/xyz/xyz.py +++ b/read_structure_step/formats/xyz/xyz.py @@ -223,9 +223,7 @@ def load_xyz( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: for line in fd: last_line += 1 @@ -261,9 +259,7 @@ def load_xyz( with ( gzip.open(path, mode="rt") if path.suffix == ".gz" - else bz2.open(path, mode="rt") - if path.suffix == ".bz2" - else open(path, "r") + else bz2.open(path, mode="rt") if path.suffix == ".bz2" else open(path, "r") ) as fd: lines = [] line_no = 0 @@ -425,34 +421,42 @@ def load_xyz( # Set the system name if system_name is not None and system_name != "": lower_name = system_name.lower() - if "from file" in lower_name: - system.name = str(path) - elif lower_name == "title": + if lower_name == "title": if len(title) > 0: system.name = title else: - system.name = str(path) + system.name = path.stem elif "canonical smiles" in lower_name: system.name = configuration.canonical_smiles elif "smiles" in lower_name: system.name = configuration.smiles + elif "iupac" in lower_name: + system.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + system.name = configuration.inchikey + elif "inchi" in lower_name: + system.name = configuration.inchi else: system.name = system_name # And the configuration name if configuration_name is not None and configuration_name != "": lower_name = configuration_name.lower() - if "from file" in lower_name: - configuration.name = obMol.GetTitle() - elif lower_name == "title": + if lower_name == "title": if len(title) > 0: configuration.name = title else: - configuration.name = str(path) + configuration.name = path.stem elif "canonical smiles" in lower_name: configuration.name = configuration.canonical_smiles elif "smiles" in lower_name: configuration.name = configuration.smiles + elif "iupac" in lower_name: + configuration.name = configuration.PC_iupac_name + elif "inchikey" in lower_name: + configuration.name = configuration.inchikey + elif "inchi" in lower_name: + configuration.name = configuration.inchi elif lower_name == "sequential": configuration.name = str(structure_no) else: diff --git a/read_structure_step/read_structure.py b/read_structure_step/read_structure.py index f50250d..d7136b0 100644 --- a/read_structure_step/read_structure.py +++ b/read_structure_step/read_structure.py @@ -13,6 +13,7 @@ import logging from pathlib import PurePath, Path +import pprint # noqa: F401 import tarfile import tempfile import textwrap @@ -180,7 +181,6 @@ def run(self): P = self.parameters.current_values_to_dict( context=seamm.flowchart_variables._data ) - # Check for tar files, potentially compressed if isinstance(P["file"], Path): path = P["file"].expanduser().resolve() diff --git a/read_structure_step/read_structure_parameters.py b/read_structure_step/read_structure_parameters.py index 03fb5b0..71aecc0 100644 --- a/read_structure_step/read_structure_parameters.py +++ b/read_structure_step/read_structure_parameters.py @@ -57,8 +57,8 @@ class ReadStructureParameters(seamm.Parameters): "help_text": ("The type of file, overrides the extension"), }, "add hydrogens": { - "default": "yes", - "kind": "bool", + "default": "no", + "kind": "boolean", "default_units": "", "enumeration": ("yes", "no"), "format_string": "s",