diff --git a/.github/workflows/pytest_codecov.yml b/.github/workflows/pytest_codecov.yml
index aeb86b122..bf35616e4 100644
--- a/.github/workflows/pytest_codecov.yml
+++ b/.github/workflows/pytest_codecov.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: [3.7]
+        python-version: [3.8]
     env:
       OS: ${{ matrix.os }}
       PYTHON: ${{ matrix.python-version }}
diff --git a/autometa-env.yml b/autometa-env.yml
index cb614d817..d1b2b8d1d 100644
--- a/autometa-env.yml
+++ b/autometa-env.yml
@@ -12,6 +12,7 @@ dependencies:
   - gdown
   - hdbscan
   - hmmer
+  - joblib==1.1.0 # See https://stackoverflow.com/a/73830525/12671809
   - numba>=0.47
   - numpy>=1.13
   - pandas>=1.1
@@ -23,8 +24,9 @@ dependencies:
   - rsync
   - samtools>=1.11
   - scikit-bio
-  - scipy==1.8 #force scipy 1.8 until scikit-bio updates to 1.9, https://github.com/KwanLab/Autometa/issues/285
+  - scipy==1.8.1 #force scipy 1.8 until scikit-bio updates to 1.9, https://github.com/KwanLab/Autometa/issues/285
   - scikit-learn==0.24 # prevent error from joblib in multiprocessing distance calculations
+  - seqkit
   - tqdm
   - trimap
   - tsne
diff --git a/autometa/binning/large_data_mode.py b/autometa/binning/large_data_mode.py
index c84f87c52..d6dc34b3b 100644
--- a/autometa/binning/large_data_mode.py
+++ b/autometa/binning/large_data_mode.py
@@ -39,7 +39,7 @@
 from autometa.common import kmers
 
 from autometa.common.exceptions import TableFormatError, BinningError
-from autometa.taxonomy.ncbi import NCBI
+from autometa.taxonomy.database import TaxonomyDatabase
 from autometa.binning.recursive_dbscan import get_clusters
 from autometa.binning.utilities import (
     write_results,
@@ -306,17 +306,26 @@ def cluster_by_taxon_partitioning(
     Raises
     -------
     TableFormatError
-        No marker information is availble for contigs to be binned.
+        No marker information is available for contigs to be binned.
     FileNotFoundError
         Provided `binning_checkpoints_fpath` does not exist
+    TableFormatError
+        No marker information is availble for contigs to be binned.
     """
+    if binning_checkpoints_fpath and not os.path.exists(binning_checkpoints_fpath):
+        raise FileNotFoundError(binning_checkpoints_fpath)
+
     if reverse_ranks:
         # species, genus, family, order, class, phylum, superkingdom
-        canonical_ranks = [rank for rank in NCBI.CANONICAL_RANKS if rank != "root"]
+        canonical_ranks = [
+            rank for rank in TaxonomyDatabase.CANONICAL_RANKS if rank != "root"
+        ]
     else:
         # superkingdom, phylum, class, order, family, genus, species
         canonical_ranks = [
-            rank for rank in reversed(NCBI.CANONICAL_RANKS) if rank != "root"
+            rank
+            for rank in reversed(TaxonomyDatabase.CANONICAL_RANKS)
+            if rank != "root"
         ]
     # if stage is cached then we can first look to the cache before we begin subsetting main...
     clustered_contigs = set()
@@ -326,15 +335,17 @@ def cluster_by_taxon_partitioning(
     starting_rank_name_txt = None
     # Retrieve appropriate starting canonical rank and rank_name_txt from cached binning checkpoints if cache was provided
     if cache:
-        if binning_checkpoints_fpath and not os.path.exists(binning_checkpoints_fpath):
-            raise FileNotFoundError(binning_checkpoints_fpath)
+        if not os.path.exists(cache):
+            os.makedirs(os.path.realpath(cache), exist_ok=True)
+            logger.debug(f"Created cache dir: {cache}")
+        if not os.path.isdir(cache):
+            raise NotADirectoryError(cache)
         if not binning_checkpoints_fpath:
             binning_checkpoints_fpath = os.path.join(
                 cache, "binning_checkpoints.tsv.gz"
             )
-        if os.path.exists(binning_checkpoints_fpath) and os.path.getsize(
-            binning_checkpoints_fpath
-        ):
+    if binning_checkpoints_fpath:
+        if os.path.exists(binning_checkpoints_fpath) and os.path.getsize(binning_checkpoints_fpath):
             checkpoint_info = get_checkpoint_info(binning_checkpoints_fpath)
             binning_checkpoints = checkpoint_info["binning_checkpoints"]
             starting_rank = checkpoint_info["starting_rank"]
@@ -392,9 +403,9 @@ def cluster_by_taxon_partitioning(
                 else None
             )
             # Create canonical rank cache outdir if it does not exist
-            rank_cache_outdir = os.path.join(cache, canonical_rank)
+            rank_cache_outdir = os.path.join(cache, canonical_rank) if cache else None
             if embedding_cache_fpath and not os.path.isdir(rank_cache_outdir):
-                os.makedirs(rank_cache_outdir)
+                os.makedirs(rank_cache_outdir, exist_ok=True)
             rank_embedding = get_kmer_embedding(
                 rank_counts,
                 cache_fpath=embedding_cache_fpath,
@@ -546,7 +557,7 @@ def cluster_by_taxon_partitioning(
             num_clusters += clustered.cluster.nunique()
             clusters.append(clustered)
             # Cache binning at rank_name_txt stage (rank-name-txt checkpointing)
-            if cache:
+            if binning_checkpoints_fpath:
                 binning_checkpoints = checkpoint(
                     checkpoints_df=binning_checkpoints,
                     clustered=clustered,
diff --git a/autometa/binning/recursive_dbscan.py b/autometa/binning/recursive_dbscan.py
index beb36c077..713e08673 100644
--- a/autometa/binning/recursive_dbscan.py
+++ b/autometa/binning/recursive_dbscan.py
@@ -24,7 +24,7 @@
 from autometa.common.markers import load as load_markers
 
 from autometa.common.exceptions import TableFormatError, BinningError
-from autometa.taxonomy.ncbi import NCBI
+from autometa.taxonomy.database import TaxonomyDatabase
 from autometa.binning.utilities import (
     write_results,
     read_annotations,
@@ -628,10 +628,14 @@ def taxon_guided_binning(
     logger.info(f"Using {method} clustering method")
     if reverse_ranks:
         # species, genus, family, order, class, phylum, superkingdom
-        ranks = [rank for rank in NCBI.CANONICAL_RANKS if rank != "root"]
+        ranks = [rank for rank in TaxonomyDatabase.CANONICAL_RANKS if rank != "root"]
     else:
         # superkingdom, phylum, class, order, family, genus, species
-        ranks = [rank for rank in reversed(NCBI.CANONICAL_RANKS) if rank != "root"]
+        ranks = [
+            rank
+            for rank in reversed(TaxonomyDatabase.CANONICAL_RANKS)
+            if rank != "root"
+        ]
     starting_rank_index = ranks.index(starting_rank)
     ranks = ranks[starting_rank_index:]
     logger.debug(f"Using ranks: {', '.join(ranks)}")
diff --git a/autometa/binning/summary.py b/autometa/binning/summary.py
index 33c25351e..3dff01d46 100644
--- a/autometa/binning/summary.py
+++ b/autometa/binning/summary.py
@@ -17,7 +17,9 @@
 
 from Bio import SeqIO
 
+from autometa.taxonomy.database import TaxonomyDatabase
 from autometa.taxonomy.ncbi import NCBI
+from autometa.taxonomy.gtdb import GTDB
 from autometa.taxonomy import majority_vote
 from autometa.common.markers import load as load_markers
 
@@ -226,7 +228,7 @@ def get_metabin_stats(
 
 
 def get_metabin_taxonomies(
-    bin_df: pd.DataFrame, ncbi: NCBI, cluster_col: str = "cluster"
+    bin_df: pd.DataFrame, taxa_db: TaxonomyDatabase, cluster_col: str = "cluster"
 ) -> pd.DataFrame:
     """Retrieve taxonomies of all clusters recovered from Autometa binning.
 
@@ -234,8 +236,8 @@ def get_metabin_taxonomies(
     ----------
     bin_df : pd.DataFrame
         Autometa binning table. index=contig, cols=['cluster','length','taxid', *canonical_ranks]
-    ncbi : autometa.taxonomy.ncbi.NCBI instance
-        Autometa NCBI class instance
+    taxa_db : autometa.taxonomy.ncbi.TaxonomyDatabase instance
+        Autometa NCBI or GTDB class instance
     cluster_col : str, optional
         Clustering column by which to group metabins
 
@@ -246,7 +248,9 @@ def get_metabin_taxonomies(
         Indexed by cluster
     """
     logger.info(f"Retrieving metabin taxonomies for {cluster_col}")
-    canonical_ranks = [rank for rank in NCBI.CANONICAL_RANKS if rank != "root"]
+    canonical_ranks = [
+        rank for rank in TaxonomyDatabase.CANONICAL_RANKS if rank != "root"
+    ]
     is_clustered = bin_df[cluster_col].notnull()
     bin_df = bin_df[is_clustered]
     outcols = [cluster_col, "length", "taxid", *canonical_ranks]
@@ -277,11 +281,13 @@ def get_metabin_taxonomies(
             taxonomies[cluster][canonical_rank].update({taxid: length})
         else:
             taxonomies[cluster][canonical_rank][taxid] += length
-    cluster_taxonomies = majority_vote.rank_taxids(taxonomies, ncbi)
+    cluster_taxonomies = majority_vote.rank_taxids(taxonomies, taxa_db=taxa_db)
     # With our cluster taxonomies, let's place these into a dataframe for easy data accession
     cluster_taxa_df = pd.Series(data=cluster_taxonomies, name="taxid").to_frame()
     # With the list of taxids, we'll retrieve their complete canonical-rank information
-    lineage_df = ncbi.get_lineage_dataframe(cluster_taxa_df.taxid.tolist(), fillna=True)
+    lineage_df = taxa_db.get_lineage_dataframe(
+        cluster_taxa_df.taxid.tolist(), fillna=True
+    )
     # Now put it all together
     cluster_taxa_df = pd.merge(
         cluster_taxa_df, lineage_df, how="left", left_on="taxid", right_index=True
@@ -323,11 +329,18 @@ def main():
         required=True,
     )
     parser.add_argument(
-        "--ncbi",
-        help="Path to user NCBI databases directory (Required for retrieving metabin taxonomies)",
+        "--dbdir",
+        help="Path to user taxonomy database directory (Required for retrieving metabin taxonomies)",
         metavar="dirpath",
         required=False,
     )
+    parser.add_argument(
+        "--dbtype",
+        help="Taxonomy database type to use (NOTE: must correspond to the same database type used during contig taxon assignment.)",
+        choices=["ncbi", "gtdb"],
+        required=False,
+        default="ncbi",
+    )
     parser.add_argument(
         "--binning-column",
         help="Binning column to use for grouping metabins",
@@ -377,14 +390,17 @@ def main():
     logger.info(f"Wrote metabin stats to {args.output_stats}")
     # Finally if taxonomy information is available then write out each metabin's taxonomy by modified majority voting method.
     if "taxid" in bin_df.columns:
-        if not args.ncbi:
+        if not args.dbdir:
             logger.warn(
-                "taxid found in dataframe. --ncbi argument is required to retrieve metabin taxonomies. Skipping..."
+                "taxid found in dataframe. --dbdir argument is required to retrieve metabin taxonomies. Skipping..."
             )
         else:
-            ncbi = NCBI(dirpath=args.ncbi)
+            if args.dbtype == "ncbi":
+                taxa_db = NCBI(dbdir=args.dbdir)
+            elif args.dbtype == "gtdb":
+                taxa_db = GTDB(dbdir=args.dbdir)
             taxa_df = get_metabin_taxonomies(
-                bin_df=bin_df, ncbi=ncbi, cluster_col=args.binning_column
+                bin_df=bin_df, taxa_db=taxa_db, cluster_col=args.binning_column
             )
             taxa_df.to_csv(args.output_taxonomy, sep="\t", index=True, header=True)
 
diff --git a/autometa/binning/utilities.py b/autometa/binning/utilities.py
index dadf4002d..389eabc5d 100644
--- a/autometa/binning/utilities.py
+++ b/autometa/binning/utilities.py
@@ -33,7 +33,7 @@
 
 from typing import Iterable, Tuple
 
-from autometa.taxonomy.ncbi import NCBI
+from autometa.taxonomy.database import TaxonomyDatabase
 
 
 logger = logging.getLogger(__name__)
@@ -98,7 +98,7 @@ def filter_taxonomy(df: pd.DataFrame, rank: str, name: str) -> pd.DataFrame:
         Provided `name` not found in `rank` column.
     """
     # First clean the assigned taxa by broadcasting lowercase and replacing any whitespace with underscores
-    for canonical_rank in NCBI.CANONICAL_RANKS:
+    for canonical_rank in TaxonomyDatabase.CANONICAL_RANKS:
         if canonical_rank not in df.columns:
             continue
         df[canonical_rank] = df[canonical_rank].map(
@@ -395,7 +395,11 @@ def write_results(
         outcols.extend(annotation_cols)
         # Add in taxonomy columns if taxa are present
         # superkingdom, phylum, class, order, family, genus, species
-        taxa_cols = [rank for rank in reversed(NCBI.CANONICAL_RANKS) if rank != "root"]
+        taxa_cols = [
+            rank
+            for rank in reversed(TaxonomyDatabase.CANONICAL_RANKS)
+            if rank != "root"
+        ]
         taxa_cols.append("taxid")
         # superkingdom, phylum, class, order, family, genus, species, taxid
         for taxa_col in taxa_cols:
diff --git a/autometa/common/utilities.py b/autometa/common/utilities.py
index 4d9e45e0d..546866fc3 100644
--- a/autometa/common/utilities.py
+++ b/autometa/common/utilities.py
@@ -27,6 +27,25 @@
 logger = logging.getLogger(__name__)
 
 
+def is_gz_file(filepath) -> bool:
+    """
+    Check if the given file is gzipped compressed or not.
+
+    Parameters
+    ----------
+    filepath : str
+        Filepath to check
+
+    Returns
+    -------
+    bool
+        True if file is gzipped else False
+    """
+    # https://stackoverflow.com/a/47080739
+    with open(filepath, "rb") as test_f:
+        return test_f.read(2) == b"\x1f\x8b"
+
+
 def unpickle(fpath: str) -> Any:
     """Load a serialized `fpath` from :func:`make_pickle`.
 
diff --git a/autometa/config/databases.py b/autometa/config/databases.py
index 83a8aebdb..881afb920 100644
--- a/autometa/config/databases.py
+++ b/autometa/config/databases.py
@@ -10,6 +10,7 @@
 import logging
 import os
 import requests
+import sys
 import subprocess
 import tempfile
 
@@ -32,6 +33,7 @@
 from autometa.config.utilities import DEFAULT_CONFIG
 from autometa.config.utilities import AUTOMETA_DIR
 from autometa.config.utilities import put_config, get_config
+from autometa.taxonomy.gtdb import create_gtdb_db
 
 
 logger = logging.getLogger(__name__)
@@ -401,6 +403,30 @@ def download_ncbi_files(self, options: Iterable) -> None:
         if "nr" in options:
             self.format_nr()
 
+    def download_gtdb_files(self) -> None:
+        gtdb_taxdump_url = self.config.get("database_urls", "gtdb_taxdmp")
+        proteins_aa_reps_url = self.config.get("database_urls", "proteins_aa_reps")
+
+        # User path:
+        gtdb_taxdump_filepath = self.config.get("gtdb", "gtdb_taxdmp")
+        proteins_aa_reps_filepath = self.config.get("gtdb", "proteins_aa_reps")
+
+        urls = [gtdb_taxdump_url, proteins_aa_reps_url]
+        filepaths = [gtdb_taxdump_filepath, proteins_aa_reps_filepath]
+
+        logger.debug(f"starting GTDB databases download")
+        for url, filepath in zip(urls, filepaths):
+            cmd = ["wget", url, "-O", filepath]
+            full_path = os.path.abspath(filepath)
+            dir_path = os.path.dirname(full_path)
+            if not os.path.exists(dir_path):
+                os.makedirs(dir_path)
+                logger.debug(f"Created missing database directory: {dir_path}")
+            logger.debug(" ".join(cmd))
+            subprocess.run(
+                cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True
+            )
+
     def press_hmms(self) -> None:
         """hmmpress markers hmm database files.
 
@@ -524,7 +550,7 @@ def download_missing(self, section: str = None) -> None:
         ----------
         section : str, optional
             Section to check for missing database files (the default is None).
-            Choices include 'ncbi' and 'markers'.
+            Choices include 'ncbi', and 'markers'.
 
         Returns
         -------
@@ -673,7 +699,7 @@ def configure(self, section: str = None, no_checksum: bool = False) -> ConfigPar
 
         Raises
         -------
-        ValueError Provided `section` does not match 'ncbi' or 'markers'.
+        ValueError Provided `section` does not match 'ncbi', or 'markers'.
             ConnectionError A connection issue occurred when connecting to NCBI
             or GitHub.
 
@@ -714,7 +740,7 @@ def main():
     )
     parser.add_argument(
         "--update-all",
-        help="Update all out-of-date databases.",
+        help="Update all out-of-date databases. (NOTE: Does not update GTDB)",
         action="store_true",
         default=False,
     )
@@ -730,6 +756,12 @@ def main():
         action="store_true",
         default=False,
     )
+    parser.add_argument(
+        "--update-gtdb",
+        help="Download and format the user-configured GTDB release databases",
+        action="store_true",
+        default=False,
+    )
     parser.add_argument(
         "--check-dependencies",
         help="Check database dependencies are satisfied.",
@@ -771,6 +803,23 @@ def main():
         section = "markers"
     elif args.update_ncbi:
         section = "ncbi"
+    elif args.update_gtdb:
+        if not os.path.exists(
+            dbs.config.get("gtdb", "proteins_aa_reps")
+        ) and not os.path.exists(dbs.config.get("gtdb", "gtdb_taxdmp")):
+            logger.info(f"GTDB database downloading: ")
+            dbs.download_gtdb_files()
+        # Format GTDB amino acid database
+        gtdb_combined = create_gtdb_db(
+            reps_faa=dbs.config.get("gtdb", "proteins_aa_reps"),
+            dbdir=dbs.config.get("databases", "gtdb"),
+        )
+        diamond.makedatabase(
+            fasta=gtdb_combined,
+            database=gtdb_combined.replace(".faa", ".dmnd"),
+            cpus=args.nproc,
+        )
+        sys.exit(0)
     else:
         section = None
 
@@ -779,15 +828,11 @@ def main():
             section=section, compare_checksums=compare_checksums
         )
         logger.info(f"Database dependencies satisfied: {dbs_satisfied}")
-        import sys
-
         sys.exit(0)
 
     config = dbs.configure(section=section, no_checksum=args.no_checksum)
 
     if not args.out:
-        import sys
-
         sys.exit(0)
     put_config(config, args.out)
     logger.info(f"{args.out} written.")
diff --git a/autometa/config/default.config b/autometa/config/default.config
index a5db14477..e16327b2a 100644
--- a/autometa/config/default.config
+++ b/autometa/config/default.config
@@ -50,6 +50,7 @@ bedtools = None
 [databases]
 base = ${common:home_dir}/autometa/databases
 ncbi = ${databases:base}/ncbi
+gtdb = ${databases:base}/gtdb
 markers = ${databases:base}/markers
 
 [database_urls]
@@ -60,6 +61,9 @@ bacteria_single_copy = https://${markers:host}/KwanLab/Autometa/main/autometa/da
 bacteria_single_copy_cutoffs = https://${markers:host}/KwanLab/Autometa/main/autometa/databases/markers/bacteria.single_copy.cutoffs
 archaea_single_copy = https://${markers:host}/KwanLab/Autometa/main/autometa/databases/markers/archaea.single_copy.hmm
 archaea_single_copy_cutoffs = https://${markers:host}/KwanLab/Autometa/main/autometa/databases/markers/archaea.single_copy.cutoffs
+proteins_aa_reps = https://${gtdb:host}/releases/${gtdb:release}/genomic_files_reps/gtdb_proteins_aa_reps.tar.gz
+gtdb_taxdmp = https://github.com/shenwei356/gtdb-taxdump/releases/latest/download/gtdb-taxdump.tar.gz
+
 
 [checksums]
 taxdump = ftp://${ncbi:host}/pub/taxonomy/taxdump.tar.gz.md5
@@ -79,6 +83,12 @@ merged = ${databases:ncbi}/merged.dmp
 accession2taxid = ${databases:ncbi}/prot.accession2taxid.gz
 nr = ${databases:ncbi}/nr.gz
 
+[gtdb]
+host = data.gtdb.ecogenomic.org
+release = latest
+proteins_aa_reps = ${databases:gtdb}/gtdb_proteins_aa_reps.tar.gz
+gtdb_taxdmp = ${databases:gtdb}/gtdb-taxdump.tar.gz
+
 [markers]
 host = raw.githubusercontent.com
 bacteria_single_copy = ${databases:markers}/bacteria.single_copy.hmm
diff --git a/autometa/config/utilities.py b/autometa/config/utilities.py
index 57607a0fc..bd876b1e6 100644
--- a/autometa/config/utilities.py
+++ b/autometa/config/utilities.py
@@ -216,7 +216,7 @@ def main():
     update_group.add_argument(
         "--section",
         help="config section to update",
-        choices=["environ", "databases", "ncbi", "markers"],
+        choices=["environ", "databases", "ncbi", "markers", "gtdb"],
         required=False,
     )
     update_group.add_argument(
diff --git a/autometa/taxonomy/database.py b/autometa/taxonomy/database.py
new file mode 100644
index 000000000..b3d1a1b56
--- /dev/null
+++ b/autometa/taxonomy/database.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python
+
+import logging
+import string
+
+from abc import ABC, abstractmethod
+from typing import Dict, Set, Tuple, List, Union, Iterable
+from autometa.common.exceptions import DatabaseOutOfSyncError
+
+import pandas as pd
+
+
+logger = logging.getLogger(__name__)
+
+
+class TaxonomyDatabase(ABC):
+
+    """
+    TaxonomyDatabase Abstract Base Class
+
+    Abstract methods
+
+    1. parse_nodes(self)
+    2. parse_names(self)
+    3. parse_merged(self)
+    4. parse_delnodes(self)
+    5. convert_accessions_to_taxids(self)
+
+    e.g.
+
+    class GTDB(TaxonomyDatabase):
+        def __init__(self, ...):
+            self.nodes = self.parse_nodes()
+            self.names = self.parse_names()            
+            self.merged = self.parse_merged()
+            self.delnodes = self.parse_delnodes()
+            ...
+        def parse_nodes(self):
+            ...
+        def parse_nodes(self):
+            ...
+        def parse_merged(self):
+            ...
+        def parse_delnodes(self):
+            ...
+        def convert_accessions_to_taxids(self, accessions):
+            ...
+
+    Available methods (after aforementioned implementations):
+
+    1. convert_taxid_dtype
+    2. name
+    3. rank
+    4. parent
+    5. lineage
+    6. is_common_ancestor
+    7. get_lineage_dataframe
+
+    Available attributes:
+
+    CANONICAL_RANKS
+    """
+
+    CANONICAL_RANKS = [
+        "species",
+        "genus",
+        "family",
+        "order",
+        "class",
+        "phylum",
+        "superkingdom",
+        "root",
+    ]
+
+    @abstractmethod
+    def parse_nodes(self) -> Dict[int, Dict[str, Union[str, int]]]:
+        """
+        Parse the `nodes.dmp` database and set to self.nodes.
+
+        Returns
+        -------
+        dict
+            {child_taxid:{'parent':parent_taxid,'rank':rank}, ...}
+        """
+
+    @abstractmethod
+    def parse_names(self) -> Dict[int, str]:
+        """
+        Parses through the names.dmp in search of the given `taxid` and returns its name
+
+        Parameters
+        ----------
+        taxid : int
+            `taxid` whose name is being returned
+        rank : str, optional
+            If  provided, will return `taxid` name at `rank`, by default None
+            Must be a canonical rank, choices: species, genus, family, order, class, phylum, superkingdom
+            Eg. self.name(562, 'genus') would return 'Escherichia', where 562 is the taxid for Escherichia coli
+
+        Returns
+        -------
+        str
+            Name of provided `taxid` if `taxid` is found in names.dmp else 'unclassified'
+
+        """
+
+    @abstractmethod
+    def parse_merged(self) -> Dict[int, int]:
+        """
+        Parses merged.dmp such that merged `taxid`s may be updated with their up-to-date `taxid`s
+
+        Returns
+        -------
+        dict
+            {old_taxid: new_taxid, ...}
+        """
+
+    @abstractmethod
+    def parse_delnodes(self) -> Set[int]:
+        """
+        Parses delnodes.dmp such that deleted `taxid`s may be updated with their up-to-date `taxid`s
+
+        Returns
+        -------
+        set
+            {taxid, ...}
+        """
+
+    @abstractmethod
+    def convert_accessions_to_taxids(
+        self,
+        accessions: Dict[str, Set[str]],
+    ) -> Tuple[Dict[str, Set[int]], pd.DataFrame]:
+        """
+        Translates subject sequence ids to taxids
+
+        Parameters
+        ----------
+        accessions : dict
+            {qseqid: {sseqid, ...}, ...}
+
+        Returns
+        -------
+        Tuple[Dict[str, Set[int]], pd.DataFrame]
+            {qseqid: {taxid, taxid, ...}, ...}, index=range, cols=[qseqid, sseqid, raw_taxid, ..., cleaned_taxid]
+
+        """
+
+    def convert_taxid_dtype(self, taxid: int) -> int:
+        """
+        1. Converts the given `taxid` to an integer and checks whether it is positive.
+        2. Checks whether `taxid` is present in both nodes.dmp and names.dmp.
+        3a. If (2) is false, will check for corresponding `taxid` in merged.dmp and convert to this then redo (2).
+        3b. If (2) is true, will return converted taxid.
+        4. If (3a) is false will look for `taxid` in delnodes.dmp. If present will convert to root (taxid=1)
+
+        Parameters
+        ----------
+        taxid : int
+            identifier for a taxon in NCBI taxonomy databases - nodes.dmp, names.dmp or merged.dmp
+
+        Returns
+        -------
+        int
+            `taxid` if the `taxid` is a positive integer and present in either nodes.dmp or names.dmp or
+            taxid recovered from merged.dmp
+
+        Raises
+        ------
+        ValueError
+            Provided `taxid` is not a positive integer
+        DatabaseOutOfSyncError
+            NCBI databases nodes.dmp, names.dmp and merged.dmp are out of sync with each other
+        """
+        #  Step 1 Converts the given `taxid` to an integer and checks whether it is positive.
+        # Checking taxid instance format
+        # This checks if an integer has been added as str, eg. "562"
+        if isinstance(taxid, str):
+            invalid_chars = set(string.punctuation + string.ascii_letters)
+            invalid_chars.discard(".")
+            if set(taxid).intersection(invalid_chars) or taxid.count(".") > 1:
+                raise ValueError(f"taxid contains invalid character(s)! Given: {taxid}")
+            taxid = float(taxid)
+        # a boolean check is needed as they will evaluate silently to 0 or 1 when cast as ints. FALSE=0, TRUE=1
+        # float(taxid).is_integer() checks if it is something like 12.0 vs. 12.9
+        # is_integer only takes float as input else raises error, thus isinstance( ,float) is used before it to make sure a float is being passed
+        if isinstance(taxid, bool) or (
+            isinstance(taxid, float) and not taxid.is_integer()
+        ):
+            raise ValueError(f"taxid must be an integer! Given: {taxid}")
+        taxid = int(taxid)
+        if taxid <= 0:
+            raise ValueError(f"Taxid must be a positive integer! Given: {taxid}")
+        # Checking databases
+        #  Step 2: Check whether taxid is present in both nodes.dmp and names.dmp.
+        if taxid not in self.names and taxid not in self.nodes:
+            # Step 3a. Check for corresponding taxid in merged.dmp
+            if taxid not in self.merged:
+                # Step 4: look for taxid in delnodes.dmp. If present will convert to root (taxid=1)
+                if taxid in self.delnodes:
+                    # Assign deleted taxid to root...
+                    if self.verbose:
+                        logger.debug(
+                            f"Found {taxid} in delnodes.dmp, converting to root (taxid=1)"
+                        )
+                    taxid = 1
+                else:
+                    err_message = f"Databases out of sync. {taxid} not in found in nodes.dmp, names.dmp, merged.dmp or delnodes.dmp"
+                    raise DatabaseOutOfSyncError(err_message)
+            else:
+                # Step 3b. convert taxid from merged.
+                if self.verbose:
+                    logger.debug(
+                        f"Converted {taxid} to {self.merged[taxid]} from merged.dmp"
+                    )
+                taxid = self.merged[taxid]
+                if taxid not in self.names and taxid not in self.nodes:
+                    # NOTE: Do not check delnodes.dmp here, as at this point it appears the databases are indeed out of sync.
+                    # i.e. The taxid should either be in merged.dmp or in delnodes.dmp if not in nodes.dmp or names.dmp
+                    err_message = f"Databases out of sync. Merged taxid ({taxid}) not found in nodes.dmp or names.dmp!"
+                    raise DatabaseOutOfSyncError(err_message)
+        return taxid
+
+    def name(self, taxid: int, rank: str = None) -> str:
+        """
+        Parses through the names.dmp in search of the given `taxid` and returns its name.
+
+        Parameters
+        ----------
+        taxid : int
+            `taxid` whose name is being returned
+        rank : str, optional
+            If  provided, will return `taxid` name at `rank`, by default None
+            Must be a canonical rank, choices: species, genus, family, order, class, phylum, superkingdom
+            Eg. self.name(562, 'genus') would return 'Escherichia', where 562 is the taxid for Escherichia coli
+
+        Returns
+        -------
+        str
+            Name of provided `taxid` if `taxid` is found in names.dmp else 'unclassified'
+
+        """
+        try:
+            taxid = self.convert_taxid_dtype(taxid)
+        except DatabaseOutOfSyncError as err:
+            logger.warning(err)
+            taxid = 0
+        if not rank:
+            return self.names.get(taxid, "unclassified")
+        if rank not in set(TaxonomyDatabase.CANONICAL_RANKS):
+            logger.warning(f"{rank} not in canonical ranks!")
+            return "unclassified"
+        ancestor_taxid = taxid
+        while ancestor_taxid != 1:
+            ancestor_rank = self.rank(ancestor_taxid)
+            if ancestor_rank == rank:
+                return self.names.get(ancestor_taxid, "unclassified")
+            ancestor_taxid = self.parent(ancestor_taxid)
+        # At this point we have not encountered a name for the taxid rank
+        # so we will place this as unclassified.
+        return "unclassified"
+
+    def rank(self, taxid: int) -> str:
+        """
+        Return the respective rank of provided `taxid`.
+
+        Parameters
+        ----------
+        taxid : int
+            `taxid` to retrieve rank from nodes
+
+        Returns
+        -------
+        str
+            rank name if taxid is found in nodes else "unclassified"
+
+        """
+        try:
+            taxid = self.convert_taxid_dtype(taxid)
+        except DatabaseOutOfSyncError as err:
+            logger.warning(err)
+            taxid = 0
+        return self.nodes.get(taxid, {"rank": "unclassified"}).get("rank")
+
+    def parent(self, taxid: int) -> int:
+        """
+        Retrieve the parent taxid of provided `taxid`.
+
+        Parameters
+        ----------
+        taxid : int
+           child taxid to retrieve parent
+
+        Returns
+        -------
+        int
+            Parent taxid if found in nodes otherwise 1
+
+        """
+        try:
+            taxid = self.convert_taxid_dtype(taxid)
+        except DatabaseOutOfSyncError as err:
+            logger.warning(err)
+            taxid = 0
+        return self.nodes.get(taxid, {"parent": 1}).get("parent")
+
+    def lineage(
+        self, taxid: int, canonical: bool = True
+    ) -> List[Dict[str, Union[str, int]]]:
+        """
+        Returns the lineage of `taxids` encountered when traversing to root
+
+        Parameters
+        ----------
+        taxid : int
+            `taxid` in nodes.dmp, whose lineage is being returned
+        canonical : bool, optional
+            Lineage includes both canonical and non-canonical ranks when False, and only the canonical ranks when True
+            Canonical ranks include : species, genus , family, order, class, phylum, superkingdom, root
+
+        Returns
+        -------
+        ordered list of dicts
+            [{'taxid':taxid, 'rank':rank,'name':name}, ...]
+        """
+        lineage = []
+        while taxid != 1:
+            if canonical and self.rank(taxid) not in TaxonomyDatabase.CANONICAL_RANKS:
+                taxid = self.parent(taxid)
+                continue
+            lineage.append(
+                {"taxid": taxid, "name": self.name(taxid), "rank": self.rank(taxid)}
+            )
+            taxid = self.parent(taxid)
+        return lineage
+
+    def is_common_ancestor(self, taxid_A: int, taxid_B: int) -> bool:
+        """
+        Determines whether the provided taxids have a non-root common ancestor
+
+        Parameters
+        ----------
+        taxid_A : int
+            taxid in taxonomy database
+        taxid_B : int
+            taxid in taxonomy database
+
+        Returns
+        -------
+        boolean
+            True if taxids share a common ancestor else False
+        """
+        lineage_a_taxids = {ancestor.get("taxid") for ancestor in self.lineage(taxid_A)}
+        lineage_b_taxids = {ancestor.get("taxid") for ancestor in self.lineage(taxid_B)}
+        common_ancestor = lineage_b_taxids.intersection(lineage_a_taxids)
+        common_ancestor.discard(1)  # This discards root
+        return True if common_ancestor else False
+
+    def get_lineage_dataframe(
+        self, taxids: Iterable, fillna: bool = True
+    ) -> pd.DataFrame:
+        """
+        Given an iterable of taxids generate a pandas DataFrame of their canonical
+        lineages
+
+        Parameters
+        ----------
+        taxids : iterable
+            `taxids` whose lineage dataframe is being returned
+        fillna : bool, optional
+            Whether to fill the empty cells  with 'unclassified' or not, default True
+
+        Returns
+        -------
+        pd.DataFrame
+            index = taxid
+            columns = [superkingdom,phylum,class,order,family,genus,species]
+
+        Example
+        -------
+
+        If you would like to merge the returned DataFrame ('this_df') with another
+        DataFrame ('your_df'). Let's say where you retrieved your taxids:
+
+        .. code-block:: python
+
+            merged_df = pd.merge(
+                left=your_df,
+                right=this_df,
+                how='left',
+                left_on=<taxid_column>,
+                right_index=True)
+        """
+        canonical_ranks = [
+            rank
+            for rank in reversed(TaxonomyDatabase.CANONICAL_RANKS)
+            if rank != "root"
+        ]
+        taxids = list(set(taxids))
+        ranked_taxids = {}
+        for rank in canonical_ranks:
+            for taxid in taxids:
+                name = self.name(taxid, rank=rank)
+                if taxid not in ranked_taxids:
+                    ranked_taxids.update({taxid: {rank: name}})
+                else:
+                    ranked_taxids[taxid].update({rank: name})
+        df = pd.DataFrame(ranked_taxids).transpose()
+        df.index.name = "taxid"
+        if fillna:
+            df.fillna(value="unclassified", inplace=True)
+        return df
diff --git a/autometa/taxonomy/gtdb.py b/autometa/taxonomy/gtdb.py
new file mode 100644
index 000000000..d81862da8
--- /dev/null
+++ b/autometa/taxonomy/gtdb.py
@@ -0,0 +1,365 @@
+"""
+# License: GNU Affero General Public License v3 or later
+# A copy of GNU AGPL v3 should have been included in this software package in LICENSE.txt.
+
+File containing definition of the GTDB class and containing functions useful for handling GTDB taxonomy databases
+"""
+
+import gzip
+import logging
+import os
+import re
+import string
+import tarfile
+import glob
+from pathlib import Path
+
+from typing import Dict, List, Set, Tuple
+from itertools import chain
+from tqdm import tqdm
+from typing import Dict
+
+import pandas as pd
+import multiprocessing as mp
+
+from autometa.common.utilities import file_length, is_gz_file
+from autometa.common.external import diamond
+from autometa.taxonomy.database import TaxonomyDatabase
+from autometa.common.exceptions import DatabaseOutOfSyncError
+
+
+logger = logging.getLogger(__name__)
+
+
+def create_gtdb_db(reps_faa: str, dbdir: str) -> str:
+    """
+    Generate a combined faa file to create the GTDB-t database.
+
+    Parameters
+    ----------
+    reps_faa : str
+        Directory having faa file of all representative genomes. Can be tarballed.
+    dbdir : str
+        Path to output directory.
+
+    Returns
+    -------
+    str
+        Path to combined faa file. This can be used to make a diamond database.
+    """
+
+    if reps_faa.endswith(".tar.gz"):
+        logger.debug(
+            f"Extracting tarball containing GTDB ref genome animo acid data sequences to: {dbdir}/protein_faa_reps"
+        )
+        tar = tarfile.open(reps_faa)
+        tar.extractall(path=dbdir)
+        tar.close()
+        logger.debug("Extraction done.")
+        reps_faa = dbdir
+
+    genome_protein_faa_filepaths = glob.glob(
+        os.path.join(reps_faa, "**", "*_protein.faa"), recursive=True
+    )
+
+    faa_index: Dict[str, str] = {}
+    for genome_protein_faa_filepath in genome_protein_faa_filepaths:
+        # Regex to get the genome accession from the file path
+        genome_acc_search = re.search(
+            r"GCA_\d+.\d?|GCF_\d+.\d?", genome_protein_faa_filepath
+        )
+        if genome_acc_search:
+            faa_index[genome_protein_faa_filepath] = genome_acc_search.group()
+
+    # Create dbdir if it doesn't exist
+    if not os.path.isdir(dbdir):
+        os.makedirs(dbdir)
+
+    logger.debug(f"Merging {len(faa_index):,} faa files.")
+    combined_faa = os.path.join(dbdir, "gtdb.faa")
+    with open(combined_faa, "w") as f_out:
+        for faa_file, acc in faa_index.items():
+            with open(faa_file) as f_in:
+                for line in f_in:
+                    if line.startswith(">"):
+                        seqheader = line.lstrip(">")
+                        line = f"\n>{acc} {seqheader}"
+                    f_out.write(line)
+    logger.debug(f"Combined GTDB faa file written to {combined_faa}")
+    return combined_faa
+
+
+class GTDB(TaxonomyDatabase):
+    """Taxonomy utilities for GTDB databases."""
+
+    def __init__(self, dbdir: str, verbose: bool = True):
+        """
+        Instantiates the GTDB class
+
+        """
+        self.dbdir = dbdir
+        self.verbose = verbose
+        self.disable = not self.verbose
+        self.dmnd_db = os.path.join(self.dbdir, "gtdb.dmnd")
+        self.accession2taxid = os.path.join(self.dbdir, "taxid.map")
+        self.nodes_fpath = os.path.join(self.dbdir, "nodes.dmp")
+        self.names_fpath = os.path.join(self.dbdir, "names.dmp")
+        self.merged_fpath = os.path.join(self.dbdir, "merged.dmp")
+        self.delnodes_fpath = os.path.join(self.dbdir, "delnodes.dmp")
+        self.verify_databases()
+        self.names = self.parse_names()
+        self.nodes = self.parse_nodes()
+        self.merged = self.parse_merged()
+        self.delnodes = self.parse_delnodes()
+
+    def verify_databases(self):
+        """
+        Verify if the required databases are present.
+
+        Raises
+        ------
+        FileNotFoundError
+            One or more of the required database were not found.
+        """
+        for filepath in [
+            self.names_fpath,
+            self.nodes_fpath,
+            self.dmnd_db,
+            self.accession2taxid,
+        ]:
+            if not os.path.exists(filepath):
+                raise FileNotFoundError(filepath)
+
+    def __repr__(self):
+        """
+        Operator overloading to return the string representation of the class object
+
+        Returns
+        -------
+        str
+            String representation of the class object
+        """
+        return str(self)
+
+    def __str__(self):
+        """
+        Operator overloading to return the directory path of the class object
+
+        Returns
+        -------
+        str
+            Directory path of the class object
+        """
+        # Perhaps should place summary here of files that do or do not exist?
+        return self.dbdir
+
+    def search_genome_accessions(self, accessions: set) -> Dict[str, int]:
+        """
+        Search taxid.map file
+
+        Parameters
+        ----------
+        accessions : set
+            Set of subject sequence ids retrieved from diamond blastp search (sseqids)
+
+        Returns
+        -------
+        Dict[str, int]
+            Dictionary containing sseqids converted to taxids
+        """
+        sseqids_to_taxids = {}
+        # "rt" open the database in text mode instead of binary to be handled like a text file
+        fh = (
+            gzip.open(self.accession2taxid, "rt")
+            if is_gz_file(self.accession2taxid)
+            else open(self.accession2taxid)
+        )
+        # skip the header line
+        __ = fh.readline()
+        n_lines = file_length(self.accession2taxid)
+        converted_sseqid_count = 0
+        logger.debug(f"parsing {self.accession2taxid}...")
+        for line in fh:
+            acc_ver, taxid = line.strip().split("\t")
+            taxid = int(taxid)
+            if acc_ver in accessions:
+                sseqids_to_taxids[acc_ver] = taxid
+                converted_sseqid_count += 1
+        fh.close()
+        logger.debug(f"sseqids converted: {converted_sseqid_count:,}")
+        return sseqids_to_taxids
+
+    def parse_nodes(self) -> Dict[int, str]:
+        """
+        Parse the `nodes.dmp` database.
+        Note: This is performed when a new GTDB class instance is constructed
+
+        Returns
+        -------
+        dict
+            {child_taxid:{'parent':parent_taxid,'rank':rank}, ...}
+        """
+        fh = open(self.nodes_fpath)
+        __ = fh.readline()  # root line
+        nodes = {1: {"parent": 1, "rank": "root"}}
+        for line in tqdm(fh, desc="parsing nodes", leave=False):
+            child, parent, rank = line.split("\t|\t")[:3]
+            parent, child = [int(node) for node in [parent, child]]
+            rank = rank.lower()
+            nodes.update({child: {"parent": parent, "rank": rank}})
+        fh.close()
+        return nodes
+
+    def parse_names(self) -> Dict[int, str]:
+        """
+        Parses through names.dmp database and loads taxids with scientific names
+
+        Returns
+        -------
+        dict
+            {taxid:name, ...}
+        """
+        names = {}
+        fh = open(self.names_fpath)
+        for line in tqdm(fh, desc="parsing names", leave=False):
+            taxid, name, __, classification = line.strip("\t|\n").split("\t|\t")[:4]
+            taxid = int(taxid)
+            name = name.lower()
+            # Only add scientific name entries
+            if classification == "scientific name":
+                names.update({taxid: name})
+        fh.close()
+        return names
+
+    def parse_merged(self) -> Dict[int, int]:
+        """
+        Parse the merged.dmp database
+
+        Returns
+        -------
+        dict
+            {old_taxid: new_taxid, ...}
+        """
+        if self.verbose:
+            logger.debug(f"Processing nodes from {self.merged_fpath}")
+        fh = open(self.merged_fpath)
+        merged = {}
+        for line in tqdm(fh, disable=self.disable, desc="parsing merged", leave=False):
+            old_taxid, new_taxid = [
+                int(taxid) for taxid in line.strip("\t|\n").split("\t|\t")
+            ]
+            merged.update({old_taxid: new_taxid})
+        fh.close()
+        if self.verbose:
+            logger.debug("merged loaded")
+        return merged
+
+    def parse_delnodes(self) -> Set[int]:
+        """
+        Parse the delnodes.dmp database
+
+        Returns
+        -------
+        set
+            {taxid, ...}
+        """
+        if self.verbose:
+            logger.debug(f"Processing delnodes from {self.delnodes_fpath}")
+        fh = open(self.delnodes_fpath)
+        delnodes = set()
+        for line in tqdm(
+            fh, disable=self.disable, desc="parsing delnodes", leave=False
+        ):
+            del_taxid = int(line.strip("\t|\n"))
+            delnodes.add(del_taxid)
+        fh.close()
+        if self.verbose:
+            logger.debug("delnodes loaded")
+        return delnodes
+
+    def convert_accessions_to_taxids(
+        self, accessions: Dict[str, Set[str]]
+    ) -> Tuple[Dict[str, Set[int]], pd.DataFrame]:
+        """
+        Translates subject sequence ids to taxids
+
+
+        Parameters
+        ----------
+        accessions : dict
+            {qseqid: {sseqid, ...}, ...}
+
+        Returns
+        -------
+        Tuple[Dict[str, Set[int]], pd.DataFrame]
+            {qseqid: {taxid, taxid, ...}, ...}, index=range, cols=[qseqid, sseqid, raw_taxid, ..., cleaned_taxid]
+
+        """
+        recovered_accessions = set(
+            chain.from_iterable(
+                [qseqid_sseqids for qseqid_sseqids in accessions.values()]
+            )
+        )
+        sseqids_to_taxids = self.search_genome_accessions(recovered_accessions)
+        sseqid_not_found = pd.NA
+        accession_to_taxid_df = pd.DataFrame(
+            [
+                {
+                    "qseqid": qseqid,
+                    "sseqid": sseqid,
+                    "raw_taxid": sseqids_to_taxids.get(sseqid, sseqid_not_found),
+                }
+                for qseqid, qseqid_sseqids in accessions.items()
+                for sseqid in qseqid_sseqids
+            ]
+        )
+
+        taxids = {}
+        root_taxid = 1
+        for qseqid, qseqid_sseqids in accessions.items():
+            qseqid_taxids = {
+                sseqids_to_taxids.get(sseqid, root_taxid) for sseqid in qseqid_sseqids
+            }
+            taxids[qseqid] = qseqid_taxids
+        return taxids, accession_to_taxid_df
+
+
+def main():
+    import argparse
+    import logging as logger
+
+    logger.basicConfig(
+        format="[%(asctime)s %(levelname)s] %(name)s: %(message)s",
+        datefmt="%m/%d/%Y %I:%M:%S %p",
+        level=logger.DEBUG,
+    )
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--reps-faa",
+        help="Path to directory containing GTDB ref genome animo acid data sequences. Can be tarballed.",
+        required=True,
+    )
+    parser.add_argument(
+        "--dbdir", help="Path to output GTDB database directory", required=True
+    )
+    parser.add_argument(
+        "--cpus",
+        help="Number of cpus to use for diamond-formatting GTDB database",
+        default=mp.cpu_count(),
+    )
+
+    args = parser.parse_args()
+
+    gtdb_combined = create_gtdb_db(reps_faa=args.reps_faa, dbdir=args.dbdir)
+    diamond.makedatabase(
+        fasta=gtdb_combined,
+        database=gtdb_combined.replace(".faa", ".dmnd"),
+        cpus=args.cpus,
+    )
+
+
+if __name__ == "__main__":
+    # python -m autometa.taxonomy.gtdb --dbdir </path/to/gtdb>
+    main()
diff --git a/autometa/taxonomy/lca.py b/autometa/taxonomy/lca.py
index 2614c2972..654c6b3a4 100644
--- a/autometa/taxonomy/lca.py
+++ b/autometa/taxonomy/lca.py
@@ -16,7 +16,6 @@
 import os
 
 from typing import Dict, Set, Tuple
-from itertools import chain
 
 import pandas as pd
 import numpy as np
@@ -26,6 +25,8 @@
 
 from autometa.config.environ import get_versions
 from autometa.taxonomy.ncbi import NCBI, NCBI_DIR
+from autometa.taxonomy.database import TaxonomyDatabase
+from autometa.taxonomy.gtdb import GTDB
 from autometa.common.utilities import make_pickle, unpickle, file_length
 from autometa.common.external import diamond
 from autometa.common.external import prodigal
@@ -34,7 +35,7 @@
 logger = logging.getLogger(__name__)
 
 
-class LCA(NCBI):
+class LCA:
     """LCA class containing methods to retrieve the Lowest Common Ancestor.
 
     LCAs may be computed given taxids, a fasta or BLAST results.
@@ -74,10 +75,14 @@ class LCA(NCBI):
 
     """
 
-    def __init__(self, dbdir: str, verbose: bool = False, cache: str = ""):
-        super().__init__(dbdir, verbose=verbose)
+    def __init__(
+        self,
+        taxonomy_db: TaxonomyDatabase,
+        verbose: bool = False,
+        cache: str = "",
+    ):
+        self.taxonomy_db = taxonomy_db
         self.verbose = verbose
-        self.dbdir = dbdir
         self.cache = cache
         self.disable = False if verbose else True
         if self.cache == None:
@@ -137,7 +142,7 @@ def prepare_tree(self):
         taxids = {}
         parents = {}
         children = {}
-        for taxid, info in self.nodes.items():
+        for taxid, info in self.taxonomy_db.nodes.items():
             if taxid == 1:
                 # Skip root
                 continue
@@ -337,102 +342,6 @@ def lca(self, node1, node2):
         # (parent, child)
         return lca_node[1]
 
-    def convert_sseqids_to_taxids(
-        self,
-        sseqids: Dict[str, Set[str]],
-    ) -> Tuple[Dict[str, Set[int]], pd.DataFrame]:
-        """
-        Translates subject sequence ids to taxids from prot.accession2taxid.gz and dead_prot.accession2taxid.gz.
-
-        .. note::
-
-            If an accession number is no longer available in prot.accesssion2taxid.gz
-            (either due to being suppressed, deprecated or removed by NCBI),
-            then root taxid (1) is returned as the taxid for the corresponding sseqid.
-
-        Parameters
-        ----------
-        sseqids : dict
-            {qseqid: {sseqid, ...}, ...}
-
-        Returns
-        -------
-        Tuple[Dict[str, Set[int]], pd.DataFrame]
-            {qseqid: {taxid, taxid, ...}, ...}, index=range, cols=[qseqid, sseqid, raw_taxid, merged_taxid, cleaned_taxid]
-
-        Raises
-        -------
-        FileNotFoundError
-            prot.accession2taxid.gz database is required for sseqid to taxid conversion.
-
-        """
-        # We first get all unique sseqids that were retrieved from the blast output
-        recovered_sseqids = set(
-            chain.from_iterable([qseqid_sseqids for qseqid_sseqids in sseqids.values()])
-        )
-
-        # Check for sseqid in dead_prot.accession2taxid.gz in case an old db was used.
-        # Any accessions not found in live prot.accession2taxid db *may* be here.
-        # This *attempts* to prevent sseqids from being assigned root (root taxid=1)
-        try:
-            sseqids_to_taxids = self.search_prot_accessions(
-                accessions=recovered_sseqids,
-                db="dead",
-                sseqids_to_taxids=None,
-            )
-            dead_sseqids_found = set(sseqids_to_taxids.keys())
-        except FileNotFoundError as db_fpath:
-            logger.warn(f"Skipping taxid conversion from {db_fpath}")
-            sseqids_to_taxids = None
-            dead_sseqids_found = set()
-
-        # Now build the mapping from sseqid to taxid from the full/live accession2taxid dbs
-        # Possibly overwriting any merged accessions to live accessions
-        sseqids_to_taxids = self.search_prot_accessions(
-            accessions=recovered_sseqids,
-            db="full",
-            sseqids_to_taxids=sseqids_to_taxids,
-        )
-
-        # Remove sseqids: Ignore any sseqids already found
-        live_sseqids_found = set(sseqids_to_taxids.keys())
-        live_sseqids_found -= dead_sseqids_found
-        recovered_sseqids -= live_sseqids_found
-        if recovered_sseqids:
-            logger.warn(
-                f"sseqids without corresponding taxid: {len(recovered_sseqids):,}"
-            )
-        root_taxid = 1
-        taxids = {}
-        sseqid_not_found = pd.NA
-        sseqid_to_taxid_df = pd.DataFrame(
-            [
-                {
-                    "qseqid": qseqid,
-                    "sseqid": sseqid,
-                    "raw_taxid": sseqids_to_taxids.get(sseqid, sseqid_not_found),
-                }
-                for qseqid, qseqid_sseqids in sseqids.items()
-                for sseqid in qseqid_sseqids
-            ]
-        )
-        # Update taxids if they are old.
-        sseqid_to_taxid_df["merged_taxid"] = sseqid_to_taxid_df.raw_taxid.map(
-            lambda tid: self.merged.get(tid, tid)
-        )
-        # If we still have missing taxids, we will set the sseqid value to the root taxid
-        # fill missing taxids with root_taxid
-        sseqid_to_taxid_df["cleaned_taxid"] = sseqid_to_taxid_df.merged_taxid.fillna(
-            root_taxid
-        )
-        for qseqid, qseqid_sseqids in sseqids.items():
-            # NOTE: we only want to retrieve the set of unique taxids (not a list) for LCA query
-            qseqid_taxids = {
-                sseqids_to_taxids.get(sseqid, root_taxid) for sseqid in qseqid_sseqids
-            }
-            taxids[qseqid] = qseqid_taxids
-        return taxids, sseqid_to_taxid_df
-
     def reduce_taxids_to_lcas(
         self, taxids: Dict[str, Set[int]]
     ) -> Tuple[Dict[str, int], pd.DataFrame]:
@@ -456,7 +365,6 @@ def reduce_taxids_to_lcas(
         for qseqid, qseqid_taxids in taxids.items():
             # This will convert any deprecated taxids to their current taxids before reduction to LCA.
             # NOTE: we only want to retrieve the set of unique taxids (not a list) for LCA query
-            qseqid_taxids = {self.merged.get(taxid, taxid) for taxid in qseqid_taxids}
             lca_taxid = False
             num_taxids = len(qseqid_taxids)
             while not lca_taxid:
@@ -560,8 +468,10 @@ def blast2lca(
             raise FileNotFoundError(blast)
         elif os.path.exists(blast) and os.path.getsize(blast):
             logger.debug(f"Retrieving taxids from {blast}")
-            sseqids = diamond.parse(results=blast, verbose=self.verbose)
-            taxids, sseqid_to_taxid_df = self.convert_sseqids_to_taxids(sseqids)
+            accessions = diamond.parse(results=blast, verbose=self.verbose)
+            taxids, sseqid_to_taxid_df = self.taxonomy_db.convert_accessions_to_taxids(
+                accessions=accessions
+            )
             if sseqid_to_taxid_output:
                 sseqid_to_taxid_df.to_csv(
                     sseqid_to_taxid_output, sep="\t", index=False, header=True
@@ -609,7 +519,7 @@ def write_lcas(self, lcas: Dict[str, int], out: str) -> str:
                 fh.write(lines)
                 lines = ""
                 nlines = 0
-            lines += f"{qseqid}\t{self.name(taxid)}\t{self.rank(taxid)}\t{taxid}\n"
+            lines += f"{qseqid}\t{self.taxonomy_db.name(taxid)}\t{self.taxonomy_db.rank(taxid)}\t{taxid}\n"
             nlines += 1
         fh.write(lines)
         fh.close()
@@ -617,7 +527,9 @@ def write_lcas(self, lcas: Dict[str, int], out: str) -> str:
         return out
 
     def parse(
-        self, lca_fpath: str, orfs_fpath: str = None
+        self,
+        lca_fpath: str,
+        orfs_fpath: str = None,
     ) -> Dict[str, Dict[str, Dict[int, int]]]:
         """Retrieve and construct contig dictionary from provided `lca_fpath`.
 
@@ -686,9 +598,9 @@ def parse(
                 taxid = int(taxid)
                 contig = contigs_from_orfs.get(orf_id)
                 if taxid != 1:
-                    while rank not in set(NCBI.CANONICAL_RANKS):
-                        taxid = self.parent(taxid)
-                        rank = self.rank(taxid)
+                    while rank not in set(TaxonomyDatabase.CANONICAL_RANKS):
+                        taxid = self.taxonomy_db.parent(taxid)
+                        rank = self.taxonomy_db.rank(taxid)
                 if contig not in lca_hits:
                     lca_hits.update({contig: {rank: {taxid: 1}}})
                 elif rank not in lca_hits[contig]:
@@ -723,10 +635,16 @@ def main():
     )
     parser.add_argument(
         "--dbdir",
-        help="Path to NCBI databases directory.",
+        help="Path to taxonomy databases directory.",
         metavar="dirpath",
         default=NCBI_DIR,
     )
+    parser.add_argument(
+        "--dbtype",
+        help="Taxonomy database to use",
+        choices=["ncbi", "gtdb"],
+        default="ncbi",
+    )
     parser.add_argument(
         "--lca-output",
         help="Path to write LCA results.",
@@ -774,7 +692,12 @@ def main():
     )
     args = parser.parse_args()
 
-    lca = LCA(dbdir=args.dbdir, verbose=args.verbose, cache=args.cache)
+    if args.dbtype == "ncbi":
+        taxonomy_db = NCBI(args.dbdir)
+    elif args.dbtype == "gtdb":
+        taxonomy_db = GTDB(args.dbdir)
+
+    lca = LCA(taxonomy_db=taxonomy_db, verbose=args.verbose, cache=args.cache)
 
     # The pkl objects will be recomputed if the respective process cache file does not exist
     if args.force_cache_overwrite:
diff --git a/autometa/taxonomy/majority_vote.py b/autometa/taxonomy/majority_vote.py
index e9bed47aa..052c9b89d 100644
--- a/autometa/taxonomy/majority_vote.py
+++ b/autometa/taxonomy/majority_vote.py
@@ -16,12 +16,14 @@
 
 from autometa.taxonomy.lca import LCA
 from autometa.taxonomy.ncbi import NCBI, NCBI_DIR
+from autometa.taxonomy.gtdb import GTDB
+from autometa.taxonomy.database import TaxonomyDatabase
 
 logger = logging.getLogger(__name__)
 
 
 def is_consistent_with_other_orfs(
-    taxid: int, rank: str, rank_counts: Dict[str, Dict], ncbi: NCBI
+    taxid: int, rank: str, rank_counts: Dict[str, Dict], taxa_db: TaxonomyDatabase
 ) -> bool:
     """Determines whether the majority of proteins in a contig, with rank equal
     to or above the given rank, are common ancestors of the taxid.
@@ -48,8 +50,8 @@ def is_consistent_with_other_orfs(
         return True, otherwise return False.
 
     """
-    rank_index = NCBI.CANONICAL_RANKS.index(rank)
-    ranks_to_consider = NCBI.CANONICAL_RANKS[rank_index:]
+    rank_index = TaxonomyDatabase.CANONICAL_RANKS.index(rank)
+    ranks_to_consider = TaxonomyDatabase.CANONICAL_RANKS[rank_index:]
     # Now we total up the consistent and inconsistent ORFs
     consistent = 0
     inconsistent = 0
@@ -57,7 +59,7 @@ def is_consistent_with_other_orfs(
         if rank_name not in rank_counts:
             continue
         for rank_taxid, count in rank_counts[rank_name].items():
-            if ncbi.is_common_ancestor(rank_taxid, taxid):
+            if taxa_db.is_common_ancestor(rank_taxid, taxid):
                 consistent += count
             else:
                 inconsistent += count
@@ -70,7 +72,7 @@ def is_consistent_with_other_orfs(
         return False
 
 
-def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
+def lowest_majority(rank_counts: Dict[str, Dict], taxa_db: TaxonomyDatabase) -> int:
     """Determine the lowest majority given `rank_counts` by first attempting to
     get a taxid that leads in counts with the highest specificity in terms of
     canonical rank.
@@ -79,8 +81,8 @@ def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
     ----------
     rank_counts : dict
         {canonical_rank:{taxid:num_hits, ...}, rank2: {...}, ...}
-    ncbi : NCBI instance
-        NCBI object from autometa.taxonomy.ncbi
+    taxa_db : TaxonomyDatabase instance
+        NCBI or GTDB subclass object of autometa.taxonomy.database.TaxonomyDatabase
 
     Returns
     -------
@@ -89,11 +91,11 @@ def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
 
     """
     taxid_totals = {}
-    for rank in NCBI.CANONICAL_RANKS:
+    for rank in TaxonomyDatabase.CANONICAL_RANKS:
         if rank not in rank_counts:
             continue
-        rank_index = NCBI.CANONICAL_RANKS.index(rank)
-        ranks_to_consider = NCBI.CANONICAL_RANKS[rank_index:]
+        rank_index = TaxonomyDatabase.CANONICAL_RANKS.index(rank)
+        ranks_to_consider = TaxonomyDatabase.CANONICAL_RANKS[rank_index:]
         for taxid in rank_counts[rank]:
             # Make a dictionary to total the number of canonical ranks hit
             # while traversing the path so that we can add 'unclassified' to
@@ -106,22 +108,22 @@ def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
             current_taxid = taxid
             current_rank = rank
             while current_taxid != 1:
-                if current_rank not in set(NCBI.CANONICAL_RANKS):
-                    current_taxid = ncbi.parent(current_taxid)
-                    current_rank = ncbi.rank(current_taxid)
+                if current_rank not in set(TaxonomyDatabase.CANONICAL_RANKS):
+                    current_taxid = taxa_db.parent(current_taxid)
+                    current_rank = taxa_db.rank(current_taxid)
                     continue
                 ranks_in_path[current_rank] += 1
                 if current_rank not in taxid_totals:
                     taxid_totals.update({current_rank: {current_taxid: 1}})
-                    current_taxid = ncbi.parent(current_taxid)
-                    current_rank = ncbi.rank(current_taxid)
+                    current_taxid = taxa_db.parent(current_taxid)
+                    current_rank = taxa_db.rank(current_taxid)
                     continue
                 if current_taxid in taxid_totals[current_rank]:
                     taxid_totals[current_rank][current_taxid] += 1
                 else:
                     taxid_totals[current_rank][current_taxid] = 1
-                current_taxid = ncbi.parent(current_taxid)
-                current_rank = ncbi.rank(current_taxid)
+                current_taxid = taxa_db.parent(current_taxid)
+                current_rank = taxa_db.rank(current_taxid)
             # Now go through ranks_in_path. Where total = 0, add 'unclassified'
             for rank_to_consider in ranks_to_consider:
                 if ranks_in_path[rank_to_consider] == 0:
@@ -135,7 +137,7 @@ def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
     # we need to add 'unclassified' to the relevant canonical taxonomic rank.
     # However, we must never allow 'unclassified' to win! (That just won't really tell us anything)
     # Now we need to determine which is the first level to have a majority
-    for rank in NCBI.CANONICAL_RANKS:
+    for rank in TaxonomyDatabase.CANONICAL_RANKS:
         total_votes = 0
         taxid_leader = None
         taxid_leader_votes = 0
@@ -155,7 +157,9 @@ def lowest_majority(rank_counts: Dict[str, Dict], ncbi: NCBI) -> int:
 
 
 def rank_taxids(
-    ctg_lcas: dict, ncbi: Union[NCBI, LCA], verbose: bool = False
+    ctg_lcas: Dict[str, Dict[str, Dict[int, int]]],
+    taxa_db: TaxonomyDatabase,
+    verbose: bool = False,
 ) -> Dict[str, int]:
     """Votes for taxids based on modified majority vote system where if a
     majority does not exist, the lowest majority is voted.
@@ -164,8 +168,8 @@ def rank_taxids(
     ----------
     ctg_lcas : dict
         {ctg1:{canonical_rank:{taxid:num_hits,...},...}, ctg2:{...},...}
-    ncbi : ncbi.NCBI or lca.LCA object
-        instance of NCBI subclass or NCBI containing NCBI methods.
+    taxa_db : TaxonomyDatabase
+        instance of NCBI or GTDB subclass of autometa.taxonomy.database.TaxonomyDatabase
     verbose : bool
         Description of parameter `verbose` (the default is False).
 
@@ -184,7 +188,7 @@ def rank_taxids(
         ctg_lcas, disable=disable, total=n_contigs, desc=desc, leave=False
     ):
         acceptedTaxid = None
-        for rank in NCBI.CANONICAL_RANKS:
+        for rank in TaxonomyDatabase.CANONICAL_RANKS:
             if acceptedTaxid is not None:
                 break
             # Order in descending order of votes
@@ -196,7 +200,7 @@ def rank_taxids(
                 )
                 for taxid in ordered_taxids:
                     if is_consistent_with_other_orfs(
-                        taxid, rank, ctg_lcas[contig], ncbi
+                        taxid, rank, ctg_lcas[contig], taxa_db
                     ):
                         acceptedTaxid = taxid
                         break
@@ -204,7 +208,7 @@ def rank_taxids(
         # draw, so we need to find the lowest taxonomic level where there is a
         # majority
         if acceptedTaxid is None:
-            acceptedTaxid = lowest_majority(ctg_lcas[contig], ncbi)
+            acceptedTaxid = lowest_majority(ctg_lcas[contig], taxa_db)
         top_taxids[contig] = acceptedTaxid
     return top_taxids
 
@@ -247,7 +251,7 @@ def write_votes(results: Dict[str, int], out: str) -> str:
 def majority_vote(
     lca_fpath: str,
     out: str,
-    ncbi_dir: str,
+    taxa_db: TaxonomyDatabase,
     verbose: bool = False,
     orfs: str = None,
 ) -> str:
@@ -259,8 +263,8 @@ def majority_vote(
         Path to lowest common ancestor assignments table.
     out : str
         Path to write assigned taxids.
-    ncbi_dir : str
-        Path to NCBI databases directory.
+    taxa_db : TaxonomyDatabase
+        An instance of TaxonomyDatabase
     verbose : bool, optional
         Increase verbosity of logging stream
     orfs: str, optional
@@ -275,12 +279,14 @@ def majority_vote(
 
     """
     outdir = os.path.dirname(os.path.realpath(out))
-    lca = LCA(dbdir=ncbi_dir, verbose=verbose, cache=outdir)
+    lca = LCA(taxonomy_db=taxa_db, verbose=verbose, cache=outdir)
     # retrieve lca taxids for each contig
     classifications = lca.parse(lca_fpath=lca_fpath, orfs_fpath=orfs)
     # Vote for majority lca taxid from contig lca taxids
     # We can pass in lca as the ncbi object here, because it is a subclass of NCBI.
-    voted_taxids = rank_taxids(ctg_lcas=classifications, ncbi=lca, verbose=verbose)
+    voted_taxids = rank_taxids(
+        ctg_lcas=classifications, taxa_db=taxa_db, verbose=verbose
+    )
     return write_votes(results=voted_taxids, out=out)
 
 
@@ -297,7 +303,13 @@ def main():
         "--output", help="Path to write voted taxid results table.", required=True
     )
     parser.add_argument(
-        "--dbdir", help="Path to NCBI databases directory.", default=NCBI_DIR
+        "--dbdir", help="Path to taxonomy database directory.", default=NCBI_DIR
+    )
+    parser.add_argument(
+        "--dbtype",
+        help="Taxonomy database to use",
+        choices=["ncbi", "gtdb"],
+        default="ncbi",
     )
     parser.add_argument(
         "--orfs",
@@ -312,10 +324,15 @@ def main():
     )
     args = parser.parse_args()
 
+    if args.dbtype == "ncbi":
+        taxa_db = NCBI(args.dbdir, verbose=args.verbose)
+    elif args.dbtype == "gtdb":
+        taxa_db = GTDB(args.dbdir, verbose=args.verbose)
+
     majority_vote(
         lca_fpath=args.lca,
         out=args.output,
-        ncbi_dir=args.dbdir,
+        taxa_db=taxa_db,
         verbose=args.verbose,
         orfs=args.orfs,
     )
diff --git a/autometa/taxonomy/ncbi.py b/autometa/taxonomy/ncbi.py
index d78d3d3c9..06757179e 100644
--- a/autometa/taxonomy/ncbi.py
+++ b/autometa/taxonomy/ncbi.py
@@ -12,7 +12,9 @@
 import os
 import string
 import sys
-from typing import Dict, Iterable, List, Set
+
+from typing import Dict, Set, Tuple
+from itertools import chain
 
 import pandas as pd
 
@@ -21,6 +23,7 @@
 from autometa.common.utilities import file_length
 from autometa.common.exceptions import DatabaseOutOfSyncError
 from autometa.config.utilities import DEFAULT_CONFIG
+from autometa.taxonomy.database import TaxonomyDatabase
 
 
 logger = logging.getLogger(__name__)
@@ -30,43 +33,32 @@
 NCBI_DIR = NCBI_DIR if not "None" in NCBI_DIR else NCBI_DIR.replace("None", ".")
 
 
-class NCBI:
+class NCBI(TaxonomyDatabase):
     """Taxonomy utilities for NCBI databases."""
 
-    CANONICAL_RANKS = [
-        "species",
-        "genus",
-        "family",
-        "order",
-        "class",
-        "phylum",
-        "superkingdom",
-        "root",
-    ]
-
-    def __init__(self, dirpath, verbose=False):
+    def __init__(self, dbdir, verbose=False):
         """
         Instantiates the NCBI class
 
         Parameters
         ----------
 
-        dirpath : str
+        dbdir : str
             Path to the database directory
 
         verbose : bool, optional
             log progress to terminal, by default False
 
         """
-        self.dirpath = dirpath
+        self.dbdir = dbdir
         self.verbose = verbose
         self.disable = not self.verbose
-        self.names_fpath = os.path.join(self.dirpath, "names.dmp")
-        self.nodes_fpath = os.path.join(self.dirpath, "nodes.dmp")
-        self.merged_fpath = os.path.join(self.dirpath, "merged.dmp")
-        self.delnodes_fpath = os.path.join(self.dirpath, "delnodes.dmp")
+        self.names_fpath = os.path.join(self.dbdir, "names.dmp")
+        self.nodes_fpath = os.path.join(self.dbdir, "nodes.dmp")
+        self.merged_fpath = os.path.join(self.dbdir, "merged.dmp")
+        self.delnodes_fpath = os.path.join(self.dbdir, "delnodes.dmp")
         # Set prot.accession2taxid filepath
-        self.accession2taxid_fpath = os.path.join(self.dirpath, "prot.accession2taxid")
+        self.accession2taxid_fpath = os.path.join(self.dbdir, "prot.accession2taxid")
         acc2taxid_gz = ".".join([self.accession2taxid_fpath, "gz"])
         if not os.path.exists(self.accession2taxid_fpath) and os.path.exists(
             acc2taxid_gz
@@ -74,7 +66,7 @@ def __init__(self, dirpath, verbose=False):
             self.accession2taxid_fpath = acc2taxid_gz
         # Set prot.accession2taxid.FULL.gz filepath
         self.accession2taxidfull_fpath = os.path.join(
-            self.dirpath, "prot.accession2taxid.FULL"
+            self.dbdir, "prot.accession2taxid.FULL"
         )
         acc2taxid_gz = ".".join([self.accession2taxidfull_fpath, "gz"])
         if not os.path.exists(self.accession2taxidfull_fpath) and os.path.exists(
@@ -83,7 +75,7 @@ def __init__(self, dirpath, verbose=False):
             self.accession2taxidfull_fpath = acc2taxid_gz
         # Set dead_prot.accession2taxid filepath
         self.dead_accession2taxid_fpath = os.path.join(
-            self.dirpath, "dead_prot.accession2taxid"
+            self.dbdir, "dead_prot.accession2taxid"
         )
         acc2taxid_gz = ".".join([self.dead_accession2taxid_fpath, "gz"])
         if not os.path.exists(self.dead_accession2taxid_fpath) and os.path.exists(
@@ -91,10 +83,10 @@ def __init__(self, dirpath, verbose=False):
         ):
             self.dead_accession2taxid_fpath = acc2taxid_gz
         # Check formatting for nr database
-        self.nr_fpath = os.path.join(self.dirpath, "nr.gz")
+        self.nr_fpath = os.path.join(self.dbdir, "nr.gz")
         nr_bname = os.path.splitext(os.path.basename(self.nr_fpath))[0]
         nr_dmnd_fname = ".".join([nr_bname, "dmnd"])
-        nr_dmnd_fpath = os.path.join(self.dirpath, nr_dmnd_fname)
+        nr_dmnd_fpath = os.path.join(self.dbdir, nr_dmnd_fname)
         if os.path.exists(nr_dmnd_fpath):
             self.nr_fpath = nr_dmnd_fpath
         if "dmnd" not in os.path.basename(self.nr_fpath):
@@ -129,186 +121,7 @@ def __str__(self):
             Directory path of the class object
         """
         # Perhaps should place summary here of files that do or do not exist?
-        return self.dirpath
-
-    def name(self, taxid: int, rank: str = None) -> str:
-        """
-        Parses through the names.dmp in search of the given `taxid` and returns its name. If the `taxid` is
-        deprecated, suppressed, withdrawn from NCBI (basically old) the updated name will be retrieved
-
-        Parameters
-        ----------
-        taxid : int
-            `taxid` whose name is being returned
-        rank : str, optional
-            If  provided, will return `taxid` name at `rank`, by default None
-            Must be a canonical rank, choices: species, genus, family, order, class, phylum, superkingdom
-            Eg. self.name(562, 'genus') would return 'Escherichia', where 562 is the taxid for Escherichia coli
-
-        Returns
-        -------
-        str
-            Name of provided `taxid` if `taxid` is found in names.dmp else 'unclassified'
-
-        Raises
-        ------
-        DatabaseOutOfSyncError
-            NCBI databases nodes.dmp, names.dmp and merged.dmp are out of sync with each other
-        """
-        try:
-            taxid = self.convert_taxid_dtype(taxid)
-        except DatabaseOutOfSyncError as err:
-            logger.warning(err)
-            taxid = 0
-        if not rank:
-            return self.names.get(taxid, "unclassified")
-        if rank not in set(NCBI.CANONICAL_RANKS):
-            logger.warning(f"{rank} not in canonical ranks!")
-            return "unclassified"
-        ancestor_taxid = taxid
-        while ancestor_taxid != 1:
-            ancestor_rank = self.rank(ancestor_taxid)
-            if ancestor_rank == rank:
-                return self.names.get(ancestor_taxid, "unclassified")
-            ancestor_taxid = self.parent(ancestor_taxid)
-        # At this point we have not encountered a name for the taxid rank
-        # so we will place this as unclassified.
-        return "unclassified"
-
-    def lineage(self, taxid: int, canonical: bool = True) -> List[Dict]:
-        """
-        Returns the lineage of `taxids` encountered when traversing to root
-
-        Parameters
-        ----------
-        taxid : int
-            `taxid` in nodes.dmp, whose lineage is being returned
-        canonical : bool, optional
-            Lineage includes both canonical and non-canonical ranks when False, and only the canonical ranks when True
-            Canonical ranks include : species, genus , family, order, class, phylum, superkingdom, root
-
-        Returns
-        -------
-        ordered list of dicts
-            [{'taxid':taxid, 'rank':rank,'name':name}, ...]
-        """
-        lineage = []
-        while taxid != 1:
-            if canonical and self.rank(taxid) not in NCBI.CANONICAL_RANKS:
-                taxid = self.parent(taxid)
-                continue
-            lineage.append(
-                {"taxid": taxid, "name": self.name(taxid), "rank": self.rank(taxid)}
-            )
-            taxid = self.parent(taxid)
-        return lineage
-
-    def get_lineage_dataframe(
-        self, taxids: Iterable, fillna: bool = True
-    ) -> pd.DataFrame:
-        """
-        Given an iterable of taxids generate a pandas DataFrame of their canonical
-        lineages
-
-        Parameters
-        ----------
-        taxids : iterable
-            `taxids` whose lineage dataframe is being returned
-        fillna : bool, optional
-            Whether to fill the empty cells  with 'unclassified' or not, default True
-
-        Returns
-        -------
-        pd.DataFrame
-            index = taxid
-            columns = [superkingdom,phylum,class,order,family,genus,species]
-
-        Example
-        -------
-
-        If you would like to merge the returned DataFrame ('this_df') with another
-        DataFrame ('your_df'). Let's say where you retrieved your taxids:
-
-        .. code-block:: python
-
-            merged_df = pd.merge(
-                left=your_df,
-                right=this_df,
-                how='left',
-                left_on=<taxid_column>,
-                right_index=True)
-        """
-        canonical_ranks = [
-            rank for rank in reversed(NCBI.CANONICAL_RANKS) if rank != "root"
-        ]
-        taxids = list(set(taxids))
-        ranked_taxids = {}
-        for rank in canonical_ranks:
-            for taxid in taxids:
-                name = self.name(taxid, rank=rank)
-                if taxid not in ranked_taxids:
-                    ranked_taxids.update({taxid: {rank: name}})
-                else:
-                    ranked_taxids[taxid].update({rank: name})
-        df = pd.DataFrame(ranked_taxids).transpose()
-        df.index.name = "taxid"
-        if fillna:
-            df.fillna(value="unclassified", inplace=True)
-        return df
-
-    def rank(self, taxid: int) -> str:
-        """
-        Return the respective rank of provided `taxid`. If the `taxid` is deprecated, suppressed,
-        withdrawn from NCBI (basically old) the updated rank will be retrieved
-
-        Parameters
-        ----------
-        taxid : int
-            `taxid` to retrieve rank from nodes.dmp
-
-        Returns
-        -------
-        str
-            rank name if taxid is found in nodes.dmp else "unclassified"
-
-        Raises
-        ------
-        DatabaseOutOfSyncError
-            NCBI databases nodes.dmp, names.dmp and merged.dmp are out of sync with each other
-        """
-        try:
-            taxid = self.convert_taxid_dtype(taxid)
-        except DatabaseOutOfSyncError as err:
-            logger.warning(err)
-            taxid = 0
-        return self.nodes.get(taxid, {"rank": "unclassified"}).get("rank")
-
-    def parent(self, taxid: int) -> int:
-        """
-        Retrieve the parent taxid of provided `taxid`. If the `taxid` is deprecated, suppressed,
-        withdrawn from NCBI (basically old) the updated parent will be retrieved
-
-        Parameters
-        ----------
-        taxid : int
-           child taxid to retrieve parent
-
-        Returns
-        -------
-        int
-            Parent taxid if found in nodes.dmp otherwise 1
-
-        Raises
-        ------
-        DatabaseOutOfSyncError
-            NCBI databases nodes.dmp, names.dmp and merged.dmp are out of sync with each other
-        """
-        try:
-            taxid = self.convert_taxid_dtype(taxid)
-        except DatabaseOutOfSyncError as err:
-            logger.warning(err)
-            taxid = 0
-        return self.nodes.get(taxid, {"parent": 1}).get("parent")
+        return self.dbdir
 
     def parse_names(self) -> Dict[int, str]:
         """
@@ -408,28 +221,6 @@ def parse_delnodes(self) -> Set[int]:
             logger.debug("delnodes loaded")
         return delnodes
 
-    def is_common_ancestor(self, taxid_A: int, taxid_B: int) -> bool:
-        """
-        Determines whether the provided taxids have a non-root common ancestor
-
-        Parameters
-        ----------
-        taxid_A : int
-            taxid in NCBI taxonomy databases - nodes.dmp, names.dmp or merged.dmp
-        taxid_B : int
-            taxid in NCBI taxonomy databases - nodes.dmp, names.dmp or merged.dmp
-
-        Returns
-        -------
-        boolean
-            True if taxids share a common ancestor else False
-        """
-        lineage_a_taxids = {ancestor.get("taxid") for ancestor in self.lineage(taxid_A)}
-        lineage_b_taxids = {ancestor.get("taxid") for ancestor in self.lineage(taxid_B)}
-        common_ancestor = lineage_b_taxids.intersection(lineage_a_taxids)
-        common_ancestor.discard(1)  # This discards root
-        return True if common_ancestor else False
-
     def convert_taxid_dtype(self, taxid: int) -> int:
         """
         1. Converts the given `taxid` to an integer and checks whether it is positive.
@@ -601,6 +392,78 @@ def search_prot_accessions(
         logger.debug(f"sseqids converted from {filename}: {converted_sseqid_count:,}")
         return sseqids_to_taxids
 
+    def convert_accessions_to_taxids(
+        self, accessions: set
+    ) -> Tuple[Dict[str, Set[int]], pd.DataFrame]:
+        # We first get all unique accessions that were retrieved from the blast output
+        recovered_accessions = set(
+            chain.from_iterable(
+                [qseqid_sseqids for qseqid_sseqids in accessions.values()]
+            )
+        )
+        # Check for sseqid in dead_prot.accession2taxid.gz in case an old db was used.
+        # Any accessions not found in live prot.accession2taxid db *may* be here.
+        # This *attempts* to prevent accessions from being assigned root (root taxid=1)
+        try:
+            sseqids_to_taxids = self.search_prot_accessions(
+                accessions=recovered_accessions,
+                db="dead",
+                sseqids_to_taxids=None,
+            )
+            dead_sseqids_found = set(sseqids_to_taxids.keys())
+        except FileNotFoundError as db_fpath:
+            logger.warn(f"Skipping taxid conversion from {db_fpath}")
+            sseqids_to_taxids = None
+            dead_sseqids_found = set()
+
+        # Now build the mapping from sseqid to taxid from the full/live accession2taxid dbs
+        # Possibly overwriting any merged accessions to live accessions
+        sseqids_to_taxids = self.search_prot_accessions(
+            accessions=recovered_accessions,
+            db="full",
+            sseqids_to_taxids=sseqids_to_taxids,
+        )
+
+        # Remove accessions: Ignore any accessions already found
+        live_sseqids_found = set(sseqids_to_taxids.keys())
+        live_sseqids_found -= dead_sseqids_found
+        recovered_accessions -= live_sseqids_found
+        if recovered_accessions:
+            logger.warn(
+                f"accessions without corresponding taxid: {len(recovered_accessions):,}"
+            )
+        root_taxid = 1
+        taxids = {}
+        sseqid_not_found = pd.NA
+        sseqid_to_taxid_df = pd.DataFrame(
+            [
+                {
+                    "qseqid": qseqid,
+                    "sseqid": sseqid,
+                    "raw_taxid": sseqids_to_taxids.get(sseqid, sseqid_not_found),
+                }
+                for qseqid, qseqid_sseqids in accessions.items()
+                for sseqid in qseqid_sseqids
+            ]
+        )
+        # Update taxids if they are old.
+        sseqid_to_taxid_df["merged_taxid"] = sseqid_to_taxid_df.raw_taxid.map(
+            lambda tid: self.merged.get(tid, tid)
+        )
+        # If we still have missing taxids, we will set the sseqid value to the root taxid
+        # fill missing taxids with root_taxid
+        sseqid_to_taxid_df["cleaned_taxid"] = sseqid_to_taxid_df.merged_taxid.fillna(
+            root_taxid
+        )
+        root_taxid = 1
+        for qseqid, qseqid_sseqids in accessions.items():
+            # NOTE: we only want to retrieve the set of unique taxids (not a list) for LCA query
+            qseqid_taxids = {
+                sseqids_to_taxids.get(sseqid, root_taxid) for sseqid in qseqid_sseqids
+            }
+            taxids[qseqid] = qseqid_taxids
+        return taxids, sseqid_to_taxid_df
+
 
 if __name__ == "__main__":
     print("file containing Autometa NCBI utilities class")
diff --git a/autometa/taxonomy/vote.py b/autometa/taxonomy/vote.py
index 9167563d3..aa6211f19 100644
--- a/autometa/taxonomy/vote.py
+++ b/autometa/taxonomy/vote.py
@@ -15,13 +15,15 @@
 import pandas as pd
 
 from Bio import SeqIO
-from typing import Union, List
+from typing import Union, List, Literal
 
 
 from autometa.common.external import prodigal
 from autometa.taxonomy import majority_vote
 from autometa.taxonomy.lca import LCA
 from autometa.taxonomy.ncbi import NCBI, NCBI_DIR
+from autometa.taxonomy.gtdb import GTDB
+from autometa.taxonomy.database import TaxonomyDatabase
 from autometa.common.exceptions import TableFormatError
 
 logger = logging.getLogger(__name__)
@@ -35,7 +37,8 @@ def assign(
     nucl_orfs: str = None,
     blast: str = None,
     lca_fpath: str = None,
-    ncbi_dir: str = NCBI_DIR,
+    dbdir: str = NCBI_DIR,
+    dbtype: Literal["ncbi", "gtdb"] = "ncbi",
     force: bool = False,
     verbose: bool = False,
     parallel: bool = True,
@@ -60,8 +63,10 @@ def assign(
         Path to blastp table, by default None
     lca_fpath : str, optional
         Path to output of LCA analysis, by default None
-    ncbi_dir : str, optional
+    dbdir : str, optional
         Path to NCBI databases directory, by default NCBI_DIR
+    dbtype : str, optional
+        Type of Taxonomy database to use, by default ncbi
     force : bool, optional
         Overwrite existing annotations, by default False
     verbose : bool, optional
@@ -95,6 +100,8 @@ def assign(
     prot_orfs = prot_orfs if prot_orfs else os.path.join(outdir, "orfs.faa")
     nucl_orfs = nucl_orfs if nucl_orfs else os.path.join(outdir, "orfs.fna")
 
+    taxa_db = NCBI(dbdir=dbdir) if dbtype == "ncbi" else GTDB(dbdir=dbdir)
+
     def call_orfs():
         prodigal.run(
             assembly=assembly,
@@ -107,16 +114,20 @@ def call_orfs():
 
     def blast2lca():
         if "lca" not in locals():
-            lca = LCA(dbdir=ncbi_dir, verbose=verbose, cache=outdir)
+            lca = LCA(taxonomy_db=taxa_db, verbose=verbose, cache=outdir)
         lca.blast2lca(
-            orfs=prot_orfs, out=lca_fpath, blast=blast, force=force, cpus=cpus
+            blast=blast,
+            out=lca_fpath,
+            force=force,
         )
 
     def majority_vote_lca(out=out):
         if "lca" not in locals():
-            lca = LCA(dbdir=ncbi_dir, verbose=verbose, cache=outdir)
+            lca = LCA(taxonomy_db=taxa_db, verbose=verbose, cache=outdir)
         ctg_lcas = lca.parse(lca_fpath=lca_fpath, orfs_fpath=prot_orfs)
-        votes = majority_vote.rank_taxids(ctg_lcas=ctg_lcas, ncbi=lca, verbose=verbose)
+        votes = majority_vote.rank_taxids(
+            ctg_lcas=ctg_lcas, taxa_db=taxa_db, verbose=verbose
+        )
         out = majority_vote.write_votes(results=votes, out=out)
         return pd.read_csv(out, sep="\t", index_col="contig")
 
@@ -146,30 +157,29 @@ def majority_vote_lca(out=out):
         calculation()
 
 
-def add_ranks(df: pd.DataFrame, ncbi: Union[NCBI, str]) -> pd.DataFrame:
+def add_ranks(df: pd.DataFrame, taxa_db: TaxonomyDatabase) -> pd.DataFrame:
     """Add canonical ranks to `df` and write to `out`
 
     Parameters
     ----------
     df : pd.DataFrame
         index="contig", column="taxid"
-    ncbi : str or NCBI
-        Path to NCBI databases directory, or autometa NCBI instance.
+    taxa_db : TaxonomyDatabase
+        NCBI or GTDB TaxonomyDatabase instance.
 
     Returns
     -------
     pd.DataFrame
         index="contig", columns=["taxid", *canonical_ranks]
     """
-    ncbi = ncbi if isinstance(ncbi, NCBI) else NCBI(ncbi)
-    dff = ncbi.get_lineage_dataframe(df["taxid"].unique().tolist())
+    dff = taxa_db.get_lineage_dataframe(df["taxid"].unique().tolist())
     return pd.merge(left=df, right=dff, how="left", left_on="taxid", right_index=True)
 
 
 def get(
     filepath_or_dataframe: Union[str, pd.DataFrame],
     kingdom: str,
-    ncbi: Union[NCBI, str] = NCBI_DIR,
+    taxa_db: TaxonomyDatabase,
 ) -> pd.DataFrame:
     """Retrieve specific `kingdom` voted taxa for `assembly` from `filepath`
 
@@ -212,7 +222,7 @@ def get(
     if df.shape[1] <= 2:
         # Voting method will write out contig and its voted taxid (2 cols).
         # So here we add the canonical ranks using voted taxids.
-        df = add_ranks(df=df, ncbi=ncbi)
+        df = add_ranks(df=df, taxa_db=taxa_db)
 
     if "superkingdom" not in df.columns:
         raise TableFormatError(f"superkingdom is not in taxonomy columns {df.columns}")
@@ -252,11 +262,9 @@ def write_ranks(
 
     Raises
     ------
-    ValueError
-        `rank` not in canonical ranks
+    KeyError
+        `rank` not in taxonomy columns
     """
-    if rank not in NCBI.CANONICAL_RANKS:
-        raise ValueError(f"rank: {rank} not in {NCBI.CANONICAL_RANKS}")
     if rank not in taxonomy.columns:
         raise KeyError(f"{rank} not in taxonomy columns: {taxonomy.columns}")
     if not os.path.exists(assembly) or not os.path.getsize(assembly):
@@ -314,7 +322,7 @@ def main():
     )
     parser.add_argument(
         "--output",
-        help="Directory to output fasta files of split canonical ranks and taxonomy.tsv.",
+        help="Output directory to write specified canonical ranks fasta files and taxon-binning results table",
         type=str,
         metavar="dirpath",
         required=True,
@@ -339,13 +347,24 @@ def main():
         ],
     )
     parser.add_argument(
-        "--ncbi",
-        help="Path to NCBI databases directory.",
+        "--dbdir",
+        help="Path to taxonomy database directory.",
         metavar="dirpath",
         default=NCBI_DIR,
     )
+    parser.add_argument(
+        "--dbtype",
+        help="Taxonomy database to use",
+        choices=["ncbi", "gtdb"],
+        default="ncbi",
+    )
     args = parser.parse_args()
 
+    if args.dbtype == "ncbi":
+        taxa_db = NCBI(args.dbdir)
+    elif args.dbtype == "gtdb":
+        taxa_db = GTDB(args.dbdir)
+
     filename = f"{args.prefix}.taxonomy.tsv" if args.prefix else "taxonomy.tsv"
     out = os.path.join(args.output, filename)
     taxa_df = pd.read_csv(args.votes, sep="\t", index_col="contig")
@@ -354,12 +373,16 @@ def main():
         os.makedirs(args.output)
 
     if taxa_df.shape[1] <= 2:
-        taxa_df = add_ranks(taxa_df, ncbi=args.ncbi)
+        taxa_df = add_ranks(taxa_df, taxa_db=taxa_db)
         taxa_df.to_csv(out, sep="\t", index=True, header=True)
         logger.debug(
             f"Wrote {taxa_df.shape[0]:,} contigs canonical rank names to {out}"
         )
     if args.split_rank_and_write:
+        if args.split_rank_and_write not in TaxonomyDatabase.CANONICAL_RANKS:
+            raise ValueError(
+                f"rank: {args.split_rank_and_write} not in {TaxonomyDatabase.CANONICAL_RANKS}"
+            )
         written_ranks = write_ranks(
             taxonomy=taxa_df,
             assembly=args.assembly,
diff --git a/docs/source/bash-workflow.rst b/docs/source/bash-workflow.rst
index c8e8a55e2..80b8a7d1d 100644
--- a/docs/source/bash-workflow.rst
+++ b/docs/source/bash-workflow.rst
@@ -133,23 +133,23 @@ Alignments Preparation
 .. code-block:: bash
 
     # First index metagenome assembly
-    bwa index \\
-        -b 550000000000 \\ # block size for the bwtsw algorithm (effective with -a bwtsw) [default=10000000]
+    bwa index \
+        -b 550000000000 \ # block size for the bwtsw algorithm (effective with -a bwtsw) [default=10000000]
         metagenome.fna     # Path to input metagenome
 
     # Now perform alignments (we are using kart, but you can use another alignment tool if you'd like)
-    kart \\
-        -i metagenome.fna                   \\ # Path to input metagenome
-        -t 20                               \\ # Number of cpus to use
-        -f /path/to/forward_reads.fastq.gz  \\ # Path to forward paired-end reads
-        -f2 /path/to/reverse_reads.fastq.gz \\ # Path to reverse paired-end reads
+    kart \
+        -i metagenome.fna                   \ # Path to input metagenome
+        -t 20                               \ # Number of cpus to use
+        -f /path/to/forward_reads.fastq.gz  \ # Path to forward paired-end reads
+        -f2 /path/to/reverse_reads.fastq.gz \ # Path to reverse paired-end reads
         -o alignments.sam                      # Path to alignments output
 
     # Now sort alignments and convert to bam format
-    samtools sort \\
-        -@ 40              \\ # Number of cpus to use
-        -m 10G             \\ # Amount of memory to use
-        alignments.sam     \\ # Input alignments file path
+    samtools sort \
+        -@ 40              \ # Number of cpus to use
+        -m 10G             \ # Amount of memory to use
+        alignments.sam     \ # Input alignments file path
         -o alignments.bam     # Output alignments file path
 
 .. _orfs-preparation:
@@ -162,11 +162,11 @@ ORFs
 
 .. code-block:: bash
 
-    prodigal -i metagenome.fna \\
-        -f "gbk" \\
-        -d "metagenome.orfs.fna" \\
-        -o "metagenome.orfs.gbk" \\
-        -a "metagenome.orfs.faa" \\ # This generated file is required as input to the bash workflow
+    prodigal -i metagenome.fna \
+        -f "gbk" \
+        -d "metagenome.orfs.fna" \
+        -o "metagenome.orfs.gbk" \
+        -a "metagenome.orfs.faa" \ # This generated file is required as input to the bash workflow
         -s "metagenome.all_orfs.txt"
 
 .. _blastp-preparation:
@@ -179,10 +179,10 @@ Diamond blastp Preparation
 
 .. code-block:: bash
 
-    diamond blastp \\
-        --query "metagenome.orfs.faa" \\ # See prodigal output from above
-        --db /path/to/nr.dmnd         \\ # See NCBI section
-        --threads <num cpus to use>   \\
+    diamond blastp \
+        --query "metagenome.orfs.faa" \ # See prodigal output from above
+        --db /path/to/nr.dmnd         \ # See NCBI section
+        --threads <num cpus to use>   \
         --out blastp.tsv # This generated file is required as input to the bash workflow
 
 .. _ncbi-preparation:
@@ -195,9 +195,9 @@ If you are running Autometa for the first time you'll have to download the NCBI
 .. code-block:: bash
 
     # First configure where you want to download the NCBI databases
-    autometa-config \\
-        --section databases \\
-        --option ncbi \\
+    autometa-config \
+        --section databases \
+        --option ncbi \
         --value <path/to/your/ncbi/database/directory>
 
     # Now download and format the NCBI databases
diff --git a/docs/source/databases.rst b/docs/source/databases.rst
index 7fa47e914..593a55dc7 100644
--- a/docs/source/databases.rst
+++ b/docs/source/databases.rst
@@ -22,14 +22,15 @@ scripts for this, you will first need to download Autometa (See :ref:`Installati
 .. code-block:: bash
 
     # First configure where you want to download the NCBI databases
-    autometa-config \\
-        --section databases --option ncbi \\
+    autometa-config \
+        --section databases --option ncbi \
         --value <path/to/your/ncbi/database/directory>
 
     # Now download and format the NCBI databases
     autometa-update-databases --update-ncbi
 
 .. note::
+
     You can check the default config paths using ``autometa-config --print``.
 
     See ``autometa-update-databases -h`` and ``autometa-config -h`` for full list of options.
@@ -45,3 +46,57 @@ The previous command will download the following NCBI databases:
 
 After these files are downloaded, the ``taxdump.tar.gz`` tarball's files are extracted and the non-redundant protein database (``nr.gz``)
 is formatted as a diamond database (i.e. ``nr.dmnd``). This will significantly speed-up the ``diamond blastp`` searches.
+
+Genome Taxonomy Database (GTDB)
+###############################
+
+If you would like to incorporate the benefits of using the Genome Taxonomy Database.
+You may do this manually or using a few Autometa helper scripts. If you would like to use Autometa's
+scripts for this, you will first need to install Autometa (See :ref:`Installation`).
+
+You can either run the following script or manually download the respective databases.
+
+.. code-block:: bash
+
+    # First configure where you want to download the GTDB databases
+    autometa-config \
+        --section databases --option gtdb \
+        --value <path/to/your/gtdb/database/directory>
+
+    # To use a specific GTDB release
+    autometa-config \
+        --section gtdb --option release \
+        --value latest
+        # Or --value r207 or --value r202, etc.
+
+    # Download and format the configured GTDB databases release
+    autometa-update-databases --update-gtdb
+
+
+.. note::
+
+    You can check the default config paths using ``autometa-config --print``.
+
+    See ``autometa-update-databases -h`` and ``autometa-config -h`` for full list of options.
+
+The previous command will download the following GTDB databases and format the `gtdb_proteins_aa_reps.tar.gz` to generate `gtdb.dmnd` to be used by Autometa:
+
+- Amino acid sequences of representative genome
+    - `gtdb_proteins_aa_reps.tar.gz <https://data.gtdb.ecogenomic.org/releases/latest/genomic_files_reps/gtdb_proteins_aa_reps.tar.gz>`_
+- gtdb-taxdump.tar.gz from `shenwei356/gtdb-taxdump <https://github.com/shenwei356/gtdb-taxdump/releases>`_
+    - `gtdb-taxdump.tar.gz <https://github.com/shenwei356/gtdb-taxdump/releases/latest/download/gtdb-taxdump.tar.gz>`_
+
+
+Once unzipped `gtdb-taxdump.tar.gz` will have the taxdump files of all the respective GTDB releases. Make sure that the release you use is in line with the `gtdb_proteins_aa_reps.tar.gz` release version. It's better to always use the latest version. 
+
+All the taxonomy files for a specific taxonomy database should be in a single directory. You can now copy the taxdump files of the desired release version in the sample directory as `gtdb.dmnd`
+
+Alternatively if you have manually downloaded `gtdb_proteins_aa_reps.tar.gz` and `gtdb-taxdump.tar.gz` you can run the following command to format the `gtdb_proteins_aa_reps.tar.gz` to generate `gtdb.dmnd` and make it ready for Autometa.
+
+.. code-block:: bash
+
+    python -m autometa.taxonomy.gtdb --reps-faa <path/to/gtdb_proteins_aa_reps.tar.gz> --dbdir <path/to/output_directory> --cpus 20
+
+.. note::
+
+    Again Make sure that the formatted `gtdb_proteins_aa_reps.tar.gz` databse and gtdb taxdump files are in the same directory. 
\ No newline at end of file
diff --git a/modules/local/binning_summary.nf b/modules/local/binning_summary.nf
index 18c1e1d27..ad5554e6d 100644
--- a/modules/local/binning_summary.nf
+++ b/modules/local/binning_summary.nf
@@ -32,7 +32,8 @@ process BINNING_SUMMARY {
         def software = getSoftwareName(task.process)
         """
         autometa-binning-summary \\
-            --ncbi $ncbi \\
+            --dbdir $ncbi \\
+            --dbtype ncbi \\
             --binning-main $binning_main \\
             --markers $markers \\
             --metagenome $metagenome \\
diff --git a/modules/local/majority_vote.nf b/modules/local/majority_vote.nf
index 0d85eb4f2..846773a1b 100644
--- a/modules/local/majority_vote.nf
+++ b/modules/local/majority_vote.nf
@@ -28,7 +28,11 @@ process MAJORITY_VOTE {
     script:
         def software = getSoftwareName(task.process)
         """
-        autometa-taxonomy-majority-vote --lca ${lca} --output votes.tsv --dbdir "${ncbi_tax_dir}"
+        autometa-taxonomy-majority-vote \\
+            --lca ${lca} \\
+            --output votes.tsv \\
+            --dbdir "${ncbi_tax_dir}" \\
+            --dbtype ncbi
 
         autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
         """
diff --git a/modules/local/prepare_lca.nf b/modules/local/prepare_lca.nf
index d8e4481d9..e11015153 100644
--- a/modules/local/prepare_lca.nf
+++ b/modules/local/prepare_lca.nf
@@ -32,6 +32,7 @@ process PREPARE_LCA {
             --blast . \\
             --lca-output . \\
             --dbdir ${blastdb_dir} \\
+            --dbtype ncbi \\
             --cache cache \\
             --only-prepare-cache
         autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
diff --git a/modules/local/reduce_lca.nf b/modules/local/reduce_lca.nf
index 7d9a46930..3424d2255 100644
--- a/modules/local/reduce_lca.nf
+++ b/modules/local/reduce_lca.nf
@@ -35,6 +35,7 @@ process REDUCE_LCA {
         autometa-taxonomy-lca \\
             --blast ${blast} \\
             --dbdir ${blastdb_dir} \\
+            --dbtype ncbi \\
             --cache ${lca_cache} \\
             --lca-error-taxids lca_error_taxids.tsv \\
             --sseqid2taxid-output sseqid2taxid.tsv \\
diff --git a/modules/local/split_kingdoms.nf b/modules/local/split_kingdoms.nf
index 393d9504c..f93c12fd8 100644
--- a/modules/local/split_kingdoms.nf
+++ b/modules/local/split_kingdoms.nf
@@ -36,7 +36,8 @@ process SPLIT_KINGDOMS {
             --output . \\
             --split-rank-and-write superkingdom \\
             --assembly "${assembly}" \\
-            --ncbi "${ncbi_tax_dir}"
+            --dbdir "${ncbi_tax_dir}" \\
+            --dbtype ncbi
 
         autometa --version | sed -e "s/autometa: //g" > ${software}.version.txt
         """
diff --git a/setup.py b/setup.py
index c35809090..bd79d9947 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@ def read(fname):
             "autometa-taxonomy = autometa.taxonomy.vote:main",
             "autometa-taxonomy-lca = autometa.taxonomy.lca:main",
             "autometa-taxonomy-majority-vote = autometa.taxonomy.majority_vote:main",
+            "autometa-setup-gtdb = autometa.taxonomy.gtdb:main",
             "autometa-binning = autometa.binning.recursive_dbscan:main",
             "autometa-binning-summary = autometa.binning.summary:main",
             "autometa-binning-ldm = autometa.binning.large_data_mode:main",
diff --git a/tests/conftest.py b/tests/conftest.py
index f49888589..9454188a4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,7 +5,7 @@
 
 @pytest.fixture(name="ncbi", scope="session", autouse=True)
 def fixture_ncbi(ncbi_dir):
-    return NCBI(dirpath=ncbi_dir, verbose=False)
+    return NCBI(dbdir=ncbi_dir, verbose=False)
 
 
 @pytest.fixture(name="dbdir", scope="session", autouse=True)
diff --git a/tests/environment.yml b/tests/environment.yml
index 1f1ab3eda..f140f6d70 100644
--- a/tests/environment.yml
+++ b/tests/environment.yml
@@ -13,6 +13,7 @@ dependencies:
   - gdown
   - hdbscan
   - hmmer
+  - joblib==1.1.0 # See https://stackoverflow.com/a/73830525/12671809
   - numba>=0.47
   - numpy>=1.13
   - pandas>=1.1
@@ -29,6 +30,7 @@ dependencies:
   - rsync
   - samtools>=1.11
   - scikit-bio
+  - scipy==1.8.1 #force scipy 1.8 until scikit-bio updates to 1.9, https://github.com/KwanLab/Autometa/issues/285
   - scikit-learn==0.24 # prevent error from joblib in multiprocessing distance calculations
   - sphinx
   - sphinx_rtd_theme
diff --git a/tests/unit_tests/test_vote.py b/tests/unit_tests/test_vote.py
index 61026cf9e..077e5c3ae 100644
--- a/tests/unit_tests/test_vote.py
+++ b/tests/unit_tests/test_vote.py
@@ -88,7 +88,7 @@ def fixture_votes_fpath(votes, tmp_path_factory):
 
 def test_add_ranks(ncbi, votes, tmp_path):
     out = tmp_path / "taxonomy.ranks_added.tsv"
-    df = vote.add_ranks(df=votes, ncbi=ncbi)
+    df = vote.add_ranks(df=votes, taxa_db=ncbi)
     assert df.shape == (2, 8)
     assert df.index.name == "contig"
     canonical_ranks = {rank for rank in ncbi.CANONICAL_RANKS if rank != "root"}
@@ -101,10 +101,7 @@ def test_add_ranks(ncbi, votes, tmp_path):
 def test_vote_assign(blastp, ncbi_dir, prot_orfs, tmp_path):
     out = tmp_path / "votes.tsv"
     votes = vote.assign(
-        out=out,
-        prot_orfs=prot_orfs,
-        blast=blastp,
-        ncbi_dir=ncbi_dir,
+        out=out, prot_orfs=prot_orfs, blast=blastp, dbdir=ncbi_dir, dbtype="ncbi"
     )
     assert isinstance(votes, pd.DataFrame)
     assert votes.index.name == "contig"
@@ -115,7 +112,7 @@ def test_get(ncbi, votes_fpath):
     df = vote.get(
         filepath_or_dataframe=votes_fpath,
         kingdom="bacteria",
-        ncbi=ncbi,
+        taxa_db=ncbi,
     )
     # canonical ranks should have been added to table if they were not already in place.
     assert df.shape == (2, 8)
@@ -126,7 +123,7 @@ def test_get_none_recovered(ncbi, votes_fpath):
         vote.get(
             filepath_or_dataframe=votes_fpath,
             kingdom="archaea",
-            ncbi=ncbi,
+            taxa_db=ncbi,
         )
 
 
@@ -136,7 +133,7 @@ def test_get_empty_votes(ncbi_dir, tmp_path):
         vote.get(
             filepath_or_dataframe=fpath,
             kingdom="archaea",
-            ncbi=ncbi_dir,
+            taxa_db=ncbi_dir,
         )
 
 
@@ -151,7 +148,7 @@ def return_df(*args, **kwargs):
         vote.get(
             filepath_or_dataframe=fpath,
             kingdom="archaea",
-            ncbi=ncbi,
+            taxa_db=ncbi,
         )
 
 
@@ -160,7 +157,7 @@ def fixture_ranks_added_votes(votes_fpath, ncbi):
     return vote.get(
         filepath_or_dataframe=votes_fpath,
         kingdom="bacteria",
-        ncbi=ncbi,
+        taxa_db=ncbi,
     )
 
 
@@ -173,7 +170,7 @@ def test_write_ranks(monkeypatch, tmp_path, ranks_added_votes, rank, num_expecte
     assembly = dirpath / "assembly.fna"
     assembly.write_text("records")
     if rank == "noncanonical_rank":
-        with pytest.raises(ValueError):
+        with pytest.raises(KeyError):
             vote.write_ranks(
                 taxonomy=ranks_added_votes, assembly=assembly, outdir=dirpath, rank=rank
             )
@@ -226,13 +223,12 @@ def test_write_ranks_no_taxonomy_columns(tmp_path, votes):
         )
 
 
-@pytest.mark.slow
-@pytest.mark.skip
 @pytest.mark.entrypoint
-def test_vote_main(monkeypatch, ncbi_dir, tmp_path):
+def test_vote_main(monkeypatch, ncbi_dir, votes_fpath, tmp_path):
     outdir = tmp_path / "outdir"
     outdir.mkdir()
-    taxonomy = outdir / "taxonomy.tsv"
+    prefix = "test"
+    taxonomy = outdir / f"{prefix}.taxonomy.tsv"
     assembly = outdir / "assembly.fna"
     assembly.write_text("records")
     lca_fpath = outdir / "lca.tsv"
@@ -240,11 +236,13 @@ def test_vote_main(monkeypatch, ncbi_dir, tmp_path):
 
     class MockArgs:
         def __init__(self):
-            self.votes = taxonomy
+            self.votes = votes_fpath
             self.output = outdir
             self.assembly = assembly
             self.split_rank_and_write = "superkingdom"
-            self.ncbi = ncbi_dir
+            self.dbdir = ncbi_dir
+            self.dbtype = "ncbi"
+            self.prefix = prefix
 
     class MockParser:
         def add_argument(self, *args, **kwargs):
diff --git a/workflows/autometa-large-data-mode.sh b/workflows/autometa-large-data-mode.sh
index b6acdc9f3..a4d65d36a 100644
--- a/workflows/autometa-large-data-mode.sh
+++ b/workflows/autometa-large-data-mode.sh
@@ -1,8 +1,10 @@
 #!/usr/bin/env bash
-#SBATCH -p partition
+#SBATCH --partition=queue
 #SBATCH -t 48:00:00
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=16
+#SBATCH --error=autometa.%J.err
+#SBATCH --output=autometa.%J.out
 
 ## First create environment to run Autometa, (and optionally GTDB-Tk and CheckM)
 # git clone git@github.com:KwanLab/Autometa
@@ -28,30 +30,42 @@ assembly="Path to metagenome assembly fasta file"
 bam="Path to metagenome read alignments.bam"
 orfs="Path to orfs used as input to diamond blastp"
 blast="Path to diamond blastp results (outfmt 6)"
-ncbi="Path to NCBI databases directory" # should contain prot.accession2taxid.gz, names.dmp, nodes.dmp and merged.dmp
+ncbi="Path to NCBI databases directory" # For more info see: https://autometa.readthedocs.io/en/latest/databases.html#ncbi
+gtdb="Path to GTDB databases directory" # (Optional) For more info see: https://autometa.readthedocs.io/en/latest/databases.html#genome-taxonomy-database-gtdb
 markers_dbdir="Path to autometa markers databases" # should contain marker hmms and cutoffs files (can be downloaded from Autometa repo under `Autometa/autometa/databases/markers`)
 
 # Autometa Parameters
 length_cutoff=3000 # in bp
-# kmer counting, normalization transform and embedding method
+# Taxon Assignment Parameters
+taxa_routine="ncbi" # Choices are "ncbi" or "ncbi_gtdb"
+# NOTE: When using the "ncbi_gtdb" option, blastP will be performed against the GTDB database
+# using the kingdom-specific ORFs retrieved from the NCBI taxon-assignment sub-workflow.
+# K-mer Counting, Normalization and Embedding Parameters
 kmer_size=5
 norm_method="am_clr" # choices: am_clr, clr, ilr
 pca_dimensions=50 # NOTE: must be greater than $embed_dimensions
 embed_dimensions=2 # NOTE: must be less than $pca_dimensions
-embed_method="umap" # choices: bhsne, sksne, umap, densne, trimap
-# Binning clustering method
-clustering_method="hdbscan" # choices: hdbscan, dbscan
+embed_method="umap" # choices: bhsne, sksne, umap, densmap, trimap
+# Binning Parameters (clustering methods and MAG quality thresholds)
+cluster_method="hdbscan" # choices: hdbscan, dbscan
 # Binning metrics cutoffs
-completeness=20.0
-purity=95.0
-cov_stddev_limit=25.0
-gc_stddev_limit=5.0
+completeness=20.0 # Accept MAGs greater than this value
+purity=95.0 # Accept MAGs greater than this value
+cov_stddev_limit=25.0 # Accept MAGs less than this value
+gc_stddev_limit=5.0 # Accept MAGs less than this value
 max_partition_size=10000
+# Runtime Parameters
 cpus=16
 seed=42
 
+if [[ $taxa_routine != "ncbi" ]] && [[ $taxa_routine != "ncbi_gtdb" ]]
+then
+    echo "ERROR: Invalid Taxonomic routine value. Please choose between ncbi or ncbi_gtdb. Current selection: ${taxa_routine}"
+    exit 1
+fi
+
 # Step 0: Do some path handling with the provided `assembly` filepath
-simpleName="TemplateAssemblyName"
+simple_name="TemplateAssemblyName"
 outdir="AutometaOutdir"
 if [ ! -d $outdir ]
 then mkdir -p $outdir
@@ -60,7 +74,9 @@ fi
 ######### BEGIN #########
 
 # Step 00: Report autometa version
+set -x
 autometa --version
+{ set +x; } 2>/dev/null
 
 # Step 1: filter assembly by length and retrieve contig lengths as well as GC content
 
@@ -69,15 +85,17 @@ autometa --version
 # $length_cutoff --> User input
 
 # output:
-filtered_assembly="${outdir}/${simpleName}.filtered.fna"
-gc_content="${outdir}/${simpleName}.gc_content.tsv"
+filtered_assembly="${outdir}/${simple_name}.filtered.fna"
+gc_content="${outdir}/${simple_name}.gc_content.tsv"
 
 # script:
+set -x
 autometa-length-filter \
     --assembly $assembly \
     --cutoff $length_cutoff \
     --output-fasta $filtered_assembly \
     --output-gc-content $gc_content
+{ set +x; } 2>/dev/null
 
 # Step 2: Determine coverages from assembly read alignments
 
@@ -85,11 +103,13 @@ autometa-length-filter \
 # NOTE: $bam is defined at top and the rest of the inputs are generated by autometa
 
 # output:
-bed="${outdir}/${simpleName}.coverages.bed.tsv"
-coverages="${outdir}/${simpleName}.coverages.tsv"
+bed="${outdir}/${simple_name}.coverages.bed.tsv"
+coverages="${outdir}/${simple_name}.coverages.tsv"
 
 # script:
+set -x
 autometa-bedtools-genomecov --ibam $bam --bed $bed --output $coverages
+{ set +x; } 2>/dev/null
 
 # Step 3: Annotate and filter markers
 # input:
@@ -101,10 +121,11 @@ kingdoms=(bacteria archaea)
 # NOTE: We iterate through both sets of markers for binning both bacterial and archeal kingdoms
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific output:
-    hmmscan="${outdir}/${simpleName}.${kingdom}.hmmscan.tsv"
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv"
+    hmmscan="${outdir}/${simple_name}.${kingdom}.hmmscan.tsv"
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv"
 
     # script:
+    set -x
     autometa-markers \
         --orfs $orfs \
         --hmmscan $hmmscan \
@@ -114,6 +135,7 @@ for kingdom in ${kingdoms[@]};do
         --parallel \
         --cpus 4 \
         --seed $seed
+    { set +x; } 2>/dev/null
 done
 
 # Step 4.1: Determine ORF lowest common ancestor (LCA) amongst top hits
@@ -121,19 +143,25 @@ done
 # input:
 # $blast --> User Input
 # $ncbi --> User Input
+# $dbtype --> Updated according to $taxa_routine
+dbtype="ncbi"
+prefix="${simple_name}.${dbtype}"
 
 # output:
-lca="${outdir}/${simpleName}.orfs.lca.tsv"
-sseqid2taxid="${outdir}/${simpleName}.orfs.sseqid2taxid.tsv"
-errorTaxids="${outdir}/${simpleName}.orfs.errortaxids.tsv"
+lca="${outdir}/${prefix}.orfs.lca.tsv"
+sseqid_to_taxid="${outdir}/${prefix}.orfs.sseqid2taxid.tsv"
+error_taxids="${outdir}/${prefix}.orfs.errortaxids.tsv"
 
 # script:
+set -x
 autometa-taxonomy-lca \
     --blast $blast \
     --dbdir $ncbi \
+    --dbtype $dbtype \
     --lca-output $lca \
-    --sseqid2taxid-output $sseqid2taxid \
-    --lca-error-taxids $errorTaxids
+    --sseqid2taxid-output $sseqid_to_taxid \
+    --lca-error-taxids $error_taxids
+{ set +x; } 2>/dev/null
 
 # Step 4.2: Perform Modified Majority vote of ORF LCAs for all contigs that returned hits in blast search
 
@@ -142,11 +170,12 @@ autometa-taxonomy-lca \
 # $ncbi --> User Input
 
 # output:
-votes="${outdir}/${simpleName}.taxids.tsv"
+votes="${outdir}/${prefix}.taxids.tsv"
 
 # script:
-autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi
-
+set -x
+autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi --dbtype $dbtype
+{ set +x; } 2>/dev/null
 # Step 4.3: Split assigned taxonomies into kingdoms
 
 # input:
@@ -157,71 +186,220 @@ autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi
 
 # output:
 # Will write recovered superkingdoms to ${outdir}
-# e.g. ${outdir}/${simpleName}.bacteria.fna
-# e.g. ${outdir}/${simpleName}.archaea.fna
-# e.g. ${outdir}/${simpleName}.taxonomy.tsv
+# e.g. ${outdir}/${prefix}.bacteria.fna
+# e.g. ${outdir}/${prefix}.archaea.fna
+# e.g. ${outdir}/${prefix}.taxonomy.tsv
 
 # script:
+set -x
 autometa-taxonomy \
     --votes $votes \
     --output $outdir \
-    --prefix $simpleName \
+    --prefix $prefix \
     --split-rank-and-write superkingdom \
     --assembly $assembly \
-    --ncbi $ncbi
+    --dbdir $ncbi \
+    --dbtype $dbtype
+{ set +x; } 2>/dev/null
+
+# Step 5: Taxon-assignment using the GTDB database 
+# NOTE: only performed if `taxa_routine` is 'ncbi_gtdb'
+
+# Step 5.1: Extract bacterial ORFs and run GTDB
+# input:
+# $kingdom_fasta --> Generated by step 4.3
+# $orfs --> User Input
+
+# output:
+# orf_prefixes --> text file containing metagenome contig IDs classified within NCBI bacteria and archaea
+# orf_ids --> text file containing contig ORF IDs classified within NCBI bacteria and archaea
+# kingdom_orfs --> fasta file containing metagenome ORFs classified within NCBI bacteria or archaea
+# gtdb_input_orfs --> metagenome orfs classified within NCBI bacteria *and* archaea
+
+if [[ "$taxa_routine" == "ncbi_gtdb" ]]
+then
+    echo "Running GTDB taxon assignment step."
+
+    # output
+    gtdb_input_orfs="${outdir}/${prefix}.orfs.faa"
+
+    for kingdom in ${kingdoms[@]};do
+
+        kingdom_fasta="${outdir}/${prefix}.${kingdom}.fna"
+        
+        orf_prefixes="${outdir}/${prefix}.${kingdom}.contigIDs.txt"
+        orf_ids="${outdir}/${prefix}.${kingdom}.orfIDs.txt"
+        kingdom_orfs="${outdir}/${prefix}.${kingdom}.orfs.faa"
+
+        if [ ! -f $kingdom_fasta ]
+        then
+            echo "${kingdom_fasta} does not exist, skipping..."
+            continue
+        fi
+
+        # Retrieve contig IDs from kingdom fasta file
+        set -x
+        grep ">" $kingdom_fasta | \
+            sed 's/^>//' | \
+            sed 's/$/_/' | \
+            cut -f1 -d" " > $orf_prefixes
+        # Retrieve ORF IDs from contig IDs
+        grep -f $orf_prefixes $orfs | sed 's/^>//' | cut -f1 -d" " > $orf_ids
+        # Retrieve ORF seqs from ORF IDs
+        seqkit grep \
+            --pattern-file $orf_ids \
+            --out-file $kingdom_orfs \
+            $orfs
+        # Concatenate kingdom ORFs to single file for GTDB blastp
+        cat $kingdom_orfs >> $gtdb_input_orfs
+        { set +x; } 2>/dev/null
+    done
+    dbtype="gtdb"
+    prefix="${simple_name}.${dbtype}"
+
+    # Step 5.2: Run blastp
+    # input:
+    # $gtdb_input_orfs --> Generated from step 5.1
+    gtdb_dmnd_db=$(find $gtdb -name "gtdb.dmnd") # generated using autometa-setup-gtdb (Must be performed prior to using this script)
+    # output
+    blast="${outdir}/${prefix}.blastp.tsv"
+
+    # script
+    set -x
+    diamond blastp \
+        --query $gtdb_input_orfs \
+        --db $gtdb_dmnd_db \
+        --evalue 1e-5 \
+        --max-target-seqs 200 \
+        --threads $cpus \
+        --outfmt 6 \
+        --out $blast
+    { set +x; } 2>/dev/null
+
+    # Step 5.3: Determine LCA
+    # input:
+    # $blast --> Generated from step 5.2
+    # $gtdb --> User Input
+    # $dbtype --> Updated according to $taxa_routine
+
+    # output:
+    lca="${outdir}/${prefix}.orfs.lca.tsv"
+    sseqid_to_taxid="${outdir}/${prefix}.orfs.sseqid2taxid.tsv"
+    error_taxids="${outdir}/${prefix}.orfs.errortaxids.tsv"
+
+    # script:
+    set -x
+    autometa-taxonomy-lca \
+        --blast $blast \
+        --dbdir $gtdb \
+        --dbtype $dbtype \
+        --lca-output $lca \
+        --sseqid2taxid-output $sseqid_to_taxid \
+        --lca-error-taxids $error_taxids 
+    { set +x; } 2>/dev/null
+
+    # Step 5.4: Perform Modified Majority vote of ORF LCAs for all contigs that returned hits in blast search
+    # input:
+    # $lca --> Generated from step 5.3
+    # $gtdb --> User Input
+
+    # output:
+    votes="${outdir}/${prefix}.taxids.tsv"
+
+    # script:
+    set -x
+    autometa-taxonomy-majority-vote \
+        --lca $lca \
+        --output $votes \
+        --dbdir $gtdb \
+        --dbtype $dbtype
+    { set +x; } 2>/dev/null
+
+    # Step 5.5: Split assigned taxonomies into kingdoms
+    # input:
+    # $votes --> Generated from step 5.4
+    # $outdir --> Generated from step 0
+    # $prefix --> Generated as input from steps 5.2 to 6
+    # $filtered_assembly --> Generated from step 1
+    # $gtdb --> User Input
+
+    # output:
+    # Will write recovered superkingdoms to $outdir
+    # e.g. ${outdir}/${prefix}.bacteria.fna
+    # e.g. ${outdir}/${prefix}.archaea.fna
+    # e.g. ${outdir}/${prefix}.taxonomy.tsv
+
+    # script:
+    set -x
+    autometa-taxonomy \
+        --votes $votes \
+        --output $outdir \
+        --prefix $prefix \
+        --split-rank-and-write superkingdom \
+        --assembly $filtered_assembly \
+        --dbdir $gtdb \
+        --dbtype $dbtype
+    { set +x; } 2>/dev/null
+fi
 
-# Step 5: Perform k-mer counting on respective kingdoms
+# Step 6: Perform k-mer counting on respective kingdoms
 
 # input:
 # $kmer_size --> User input
-# $norm_method --> User input
-# $embed_method --> User input
-# $embed_dimensions --> User input
 # $cpus --> User input
-# $seed --> User input
 
 kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific input:
     # NOTE: $fasta --> Generated by step 4.3
-    fasta="${outdir}/${simpleName}.${kingdom}.fna"
+    fasta="${outdir}/${prefix}.${kingdom}.fna" # NOTE: $prefix is updated according to taxa_routine above
+    # kingdom-specific output:
+    counts="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.tsv"
+    
     if [ ! -f $fasta ]
     then
         echo "${fasta} does not exist, skipping..."
         continue
     fi
-
-    # kingdom-specific output:
-    counts="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.tsv"
-
     # script:
+    set -x
     autometa-kmers \
         --fasta $fasta \
         --kmers $counts \
         --size $kmer_size \
         --cpus $cpus
+    { set +x; } 2>/dev/null
 done
 
-# Step 6: Perform binning on each set of bacterial and archaeal contigs
+# Step 7: Perform binning on each set of bacterial and archaeal contigs
 
 # input:
-# $cpus --> User input
-# $seed --> User input
-taxonomy="${outdir}/${simpleName}.taxonomy.tsv" # Generated by step 4.3
+# $coverages --> Generated by step 2
 # $gc_content --> Generated by step 1
+taxonomy="${outdir}/${prefix}.taxonomy.tsv" # NOTE: $prefix is updated according to taxa_routine above
+# $taxonomy is generated by either steps 4.3 or 5.5 depending on whether taxa_routine is 'ncbi' or 'ncbi_gtdb', respectively
+# $cluster_method --> User input
+# $completeness --> User input
+# $purity --> User input
+# $cov_stddev_limit --> User input
+# $gc_stddev_limit --> User input
+# $norm_method --> User input
+# $pca_dimensions --> User input
+# $embed_method --> User input
+# $max_partition_size --> User input
 
 kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific input:
-    kmers="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.tsv" # Generated by step 5 (counts)
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv" # Generated by step 3
+    kmers="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.tsv" # Generated by step 6 (counts)
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv" # Generated by step 3 (before taxon-assignment sub-workflows)
 
     # kingdom-specific output:
-    cache="${outdir}/${simpleName}_${kingdom}_cache"
-    output_binning="${outdir}/${simpleName}.${kingdom}.${clustering_method}.tsv"
-    output_main="${outdir}/${simpleName}.${kingdom}.${clustering_method}.main.tsv"
+    cache="${outdir}/${prefix}_${kingdom}_cache"
+    output_binning="${outdir}/${prefix}.${kingdom}.${cluster_method}.tsv"
+    output_main="${outdir}/${prefix}.${kingdom}.${cluster_method}.main.tsv"
 
     if [ ! -f $kmers ]
     then
@@ -235,6 +413,7 @@ for kingdom in ${kingdoms[@]};do
     fi
 
     # script:
+    set -x
     autometa-binning-ldm \
         --kmers $kmers \
         --coverages $coverages \
@@ -243,7 +422,7 @@ for kingdom in ${kingdoms[@]};do
         --taxonomy $taxonomy \
         --output-binning $output_binning \
         --output-main $output_main \
-        --clustering-method $clustering_method \
+        --clustering-method $cluster_method \
         --completeness $completeness \
         --purity $purity \
         --cov-stddev-limit $cov_stddev_limit \
@@ -257,27 +436,29 @@ for kingdom in ${kingdoms[@]};do
         --cache $cache \
         --rank-filter superkingdom \
         --rank-name-filter $kingdom
-
+    { set +x; } 2>/dev/null
 done
 
-# Step 7: Create binning summary files
+# Step 8: Create binning summary files
 
 # input:
 # $ncbi -> User input
+# $gtdb -> User input
 # $assembly -> User input
+# $dbtype -> # NOTE: $prefix is updated according to taxa_routine above
 
 kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
 
     # kingdom-specific input:
-    binning_main="${outdir}/${simpleName}.${kingdom}.${clustering_method}.main.tsv" # Generated by step 6
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv" # Generated by step 3
+    binning_main="${outdir}/${prefix}.${kingdom}.${cluster_method}.main.tsv" # Generated by step 7
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv" # Generated by step 3
 
     # kingdom-specific output:
-    output_stats="${outdir}/${simpleName}_${kingdom}_metabin_stats.tsv"
-    output_taxonomy="${outdir}/${simpleName}_${kingdom}_metabin_taxonomy.tsv"
-    output_metabins="${outdir}/${simpleName}_${kingdom}_metabins"
+    output_stats="${outdir}/${prefix}_${kingdom}_metabin_stats.tsv"
+    output_taxonomy="${outdir}/${prefix}_${kingdom}_metabin_taxonomy.tsv"
+    output_metabins="${outdir}/${prefix}_${kingdom}_metabins"
 
     if [ ! -f $binning_main ]
     then
@@ -285,33 +466,41 @@ for kingdom in ${kingdoms[@]};do
         continue
     fi
 
-    # script:
+    if [[ "$taxa_routine" == "ncbi_gtdb" ]]
+    then
+        dbdir=$gtdb
+    else
+        dbdir=$ncbi
+    fi
+    set -x
     autometa-binning-summary \
         --binning-main $binning_main \
         --markers $markers \
         --metagenome $assembly \
-        --ncbi $ncbi \
+        --dbdir $dbdir \
+        --dbtype $dbtype \
         --output-stats $output_stats \
         --output-taxonomy $output_taxonomy \
         --output-metabins $output_metabins
+    { set +x; } 2>/dev/null
 done
 
-# Step 8: (OPTIONAL) Annotate Autometa bins' taxonomies and completeness/contamination metrics using GTDB-Tk & CheckM
+# Step 9: (OPTIONAL) Annotate Autometa bins' taxonomies and completeness/contamination metrics using GTDB-Tk & CheckM
 
 # input:
-# $output_metabins --> Generated by step 7
+# $output_metabins --> Generated by step 8
 
 # kingdoms=(bacteria archaea)
 
 # for kingdom in ${kingdoms[@]};do
 
 #     # kingdom-specific input
-#     output_metabins="${outdir}/${simpleName}_${kingdom}_metabins"
+#     # output_metabins="${outdir}/${simple_name}_${dbtype}_${kingdom}_metabins"
 
 #     # kingdom-specific outputs
-#     gtdbtk_outdir="${outdir}/${simpleName}_${kingdom}_gtdbtk_classify_wf"
-#     checkm_outdir="${outdir}/${simpleName}_${kingdom}_checkm_lineage_wf"
-#     checkm_stats="${outdir}/${simpleName}_${kingdom}_checkm_stats.tsv"
+#     gtdbtk_outdir="${outdir}/${simple_name}_${dbtype}_${kingdom}_gtdbtk_classify_wf"
+#     checkm_outdir="${outdir}/${simple_name}_${dbtype}_${kingdom}_checkm_lineage_wf"
+#     checkm_stats="${outdir}/${simple_name}_${dbtype}_${kingdom}_checkm_stats.tsv"
 
 #     if [ ! -d $output_metabins ]
 #     then
@@ -320,7 +509,8 @@ done
 #     fi
 
 #     # NOTE: autometa-binning-summary writes out all bins with the <cluster>.fna extension
-#     # Unclustered sequences are written to one file with the .fasta extension. e.g. outdir/unclustered.fasta
+#     # Unclustered sequences are written to one file with the .fasta extension.
+#     # e.g. ${outdir}/${prefix}_${kingdom}_metabins/unclustered.fasta
 
 #     # First run `gtdbtk check_install` to ensure the following command will run
 #     gtdbtk classify_wf \
diff --git a/workflows/autometa.sh b/workflows/autometa.sh
index ae742b42c..fda8862b1 100644
--- a/workflows/autometa.sh
+++ b/workflows/autometa.sh
@@ -1,9 +1,10 @@
 #!/usr/bin/env bash
-#SBATCH -p partition
+#SBATCH --partition=queue
 #SBATCH -t 48:00:00
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=16
-
+#SBATCH --error=autometa.%J.err
+#SBATCH --output=autometa.%J.out
 
 # NOTE: To create the conda environment for autometa you can supply the Makefile command:
 # make create_environment
@@ -18,29 +19,41 @@
 assembly="Path to metagenome assembly fasta file"
 bam="Path to metagenome read alignments.bam"
 orfs="Path to orfs used as input to diamond blast"
-blast="Path to diamond output file (outfmt 6)"
-ncbi="Path to NCBI databases directory"
+blast="Path to diamond output file (outfmt 6)." # BlastP should be done against the NCBI `nr` database.
+ncbi="Path to NCBI databases directory" # For more info see: https://autometa.readthedocs.io/en/latest/databases.html#ncbi
+gtdb="Path to GTDB databases directory" # (Optional) For more info see: https://autometa.readthedocs.io/en/latest/databases.html#genome-taxonomy-database-gtdb
 
 # Autometa Parameters
 length_cutoff=3000 # in bp
-# kmer counting, normalization transform and embedding method
+# Taxon Assignment Parameters
+taxa_routine="ncbi" # Choices are "ncbi" or "ncbi_gtdb"
+# NOTE: When using the "ncbi_gtdb" option, blastP will be performed against the GTDB database
+# using the kingdom-specific ORFs retrieved from the NCBI taxon-assignment sub-workflow.
+# K-mer Counting, Normalization and Embedding Parameters
 kmer_size=5
 norm_method="am_clr" # choices: am_clr, clr, ilr
 pca_dimensions=50 # NOTE: must be greater than $embed_dimensions
 embed_dimensions=2 # NOTE: must be less than $pca_dimensions
-embed_method="bhsne" # choices: bhsne, sksne, umap, densne, trimap
-# Binning clustering method
+embed_method="bhsne" # choices: bhsne, sksne, umap, densmap, trimap
+# Binning Parameters (clustering methods and MAG quality thresholds)
 cluster_method="hdbscan" # choices: hdbscan, dbscan
 # Binning metrics cutoffs
-completeness=20.0
-purity=95.0
-cov_stddev_limit=25.0
-gc_stddev_limit=5.0
+completeness=20.0 # Accept MAGs greater than this value
+purity=95.0 # Accept MAGs greater than this value
+cov_stddev_limit=25.0 # Accept MAGs less than this value
+gc_stddev_limit=5.0 # Accept MAGs less than this value
+# Runtime Parameters
 cpus=16
 seed=42
 
+if [[ $taxa_routine != "ncbi" ]] && [[ $taxa_routine != "ncbi_gtdb" ]]
+then
+    echo "ERROR: Invalid Taxonomic routine value. Please choose between ncbi or ncbi_gtdb. Current selection: ${taxa_routine}"
+    exit 1
+fi
+
 # Step 0: Do some Path handling with the provided `assembly` filepath
-simpleName="TemplateAssemblyName"
+simple_name="TemplateAssemblyName"
 outdir="AutometaOutdir"
 if [ ! -d $outdir ]
 then mkdir -p $outdir
@@ -48,8 +61,11 @@ fi
 
 ######### BEGIN #########
 
+
 # Step 00: Report autometa version
+set -x
 autometa --version
+{ set +x; } 2>/dev/null
 
 # Step 1: filter assembly by length and retrieve contig lengths as well as GC content
 
@@ -58,15 +74,17 @@ autometa --version
 # $length_cutoff --> User input
 
 # output:
-filtered_assembly="${outdir}/${simpleName}.filtered.fna"
-gc_content="${outdir}/${simpleName}.gc_content.tsv"
+filtered_assembly="${outdir}/${simple_name}.filtered.fna"
+gc_content="${outdir}/${simple_name}.gc_content.tsv"
 
 # script:
+set -x
 autometa-length-filter \
     --assembly $assembly \
     --cutoff $length_cutoff \
     --output-fasta $filtered_assembly \
     --output-gc-content $gc_content
+{ set +x; } 2>/dev/null
 
 # Step 2: Determine coverages from assembly read alignments
 
@@ -74,11 +92,13 @@ autometa-length-filter \
 # NOTE: $bam is defined at top and the rest of the inputs are generated by autometa
 
 # output:
-bed="${outdir}/${simpleName}.coverages.bed.tsv"
-coverages="${outdir}/${simpleName}.coverages.tsv"
+bed="${outdir}/${simple_name}.coverages.bed.tsv"
+coverages="${outdir}/${simple_name}.coverages.tsv"
 
 # script:
+set -x
 autometa-bedtools-genomecov --ibam $bam --bed $bed --output $coverages
+{ set +x; } 2>/dev/null
 
 # Step 3: Annotate and filter markers
 # input:
@@ -90,10 +110,11 @@ kingdoms=(bacteria archaea)
 # NOTE: We iterate through both sets of markers for binning both bacterial and archeal kingdoms
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific output:
-    hmmscan="${outdir}/${simpleName}.${kingdom}.hmmscan.tsv"
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv"
+    hmmscan="${outdir}/${simple_name}.${kingdom}.hmmscan.tsv"
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv"
 
     # script:
+    set -x
     autometa-markers \
         --orfs $orfs \
         --hmmscan $hmmscan \
@@ -102,6 +123,7 @@ for kingdom in ${kingdoms[@]};do
         --parallel \
         --cpus 4 \
         --seed $seed
+    { set +x; } 2>/dev/null
 done
 
 # Step 4.1: Determine ORF lowest common ancestor (LCA) amongst top hits
@@ -109,31 +131,40 @@ done
 # input:
 # $blast --> User Input
 # $ncbi --> User Input
+# $dbtype --> Updated according to $taxa_routine
+dbtype="ncbi"
+prefix="${simple_name}.${dbtype}"
 
 # output:
-lca="${outdir}/${simpleName}.orfs.lca.tsv"
-sseqid2taxid="${outdir}/${simpleName}.orfs.sseqid2taxid.tsv"
-errorTaxids="${outdir}/${simpleName}.orfs.errortaxids.tsv"
+lca="${outdir}/${prefix}.orfs.lca.tsv"
+sseqid_to_taxid="${outdir}/${prefix}.orfs.sseqid2taxid.tsv"
+error_taxids="${outdir}/${prefix}.orfs.errortaxids.tsv"
 
 # script:
+set -x
 autometa-taxonomy-lca \
     --blast $blast \
     --dbdir $ncbi \
+    --dbtype $dbtype \
     --lca-output $lca \
-    --sseqid2taxid-output $sseqid2taxid \
-    --lca-error-taxids $errorTaxids
+    --sseqid2taxid-output $sseqid_to_taxid \
+    --lca-error-taxids $error_taxids
+{ set +x; } 2>/dev/null
 
 # Step 4.2: Perform Modified Majority vote of ORF LCAs for all contigs that returned hits in blast search
 
 # input:
 # $lca --> Generated by step 4.1
 # $ncbi --> User Input
+# $dbtype --> Updated according to $taxa_routine
 
 # output:
-votes="${outdir}/${simpleName}.taxids.tsv"
+votes="${outdir}/${prefix}.taxids.tsv"
 
 # script:
-autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi
+set -x
+autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi --dbtype $dbtype
+{ set +x; } 2>/dev/null
 
 # Step 4.3: Split assigned taxonomies into kingdoms
 
@@ -145,20 +176,162 @@ autometa-taxonomy-majority-vote --lca $lca --output $votes --dbdir $ncbi
 
 # output:
 # Will write recovered superkingdoms to ${outdir}
-# e.g. ${outdir}/${simpleName}.bacteria.fna
-# e.g. ${outdir}/${simpleName}.archaea.fna
-# e.g. ${outdir}/${simpleName}.taxonomy.tsv
+# e.g. ${outdir}/${prefix}.bacteria.fna
+# e.g. ${outdir}/${prefix}.archaea.fna
+# e.g. ${outdir}/${prefix}.taxonomy.tsv
 
 # script:
+set -x
 autometa-taxonomy \
     --votes $votes \
     --output $outdir \
-    --prefix $simpleName \
+    --prefix $prefix \
     --split-rank-and-write superkingdom \
     --assembly $assembly \
-    --ncbi $ncbi
+    --dbdir $ncbi \
+    --dbtype $dbtype
+{ set +x; } 2>/dev/null
 
-# Step 5: Perform k-mer counting on respective kingdoms
+# Step 5: Taxon-assignment using the GTDB database 
+# NOTE: only performed if `taxa_routine` is 'ncbi_gtdb'
+
+# Step 5.1: Extract bacterial ORFs and run GTDB
+# input:
+# $kingdom_fasta --> Generated by step 4.3
+# $orfs --> User Input
+
+# output:
+# orf_prefixes --> text file containing metagenome contig IDs classified within NCBI bacteria and archaea
+# orf_ids --> text file containing contig ORF IDs classified within NCBI bacteria and archaea
+# kingdom_orfs --> fasta file containing metagenome ORFs classified within NCBI bacteria or archaea
+# gtdb_input_orfs --> metagenome orfs classified within NCBI bacteria *and* archaea
+
+if [[ "$taxa_routine" == "ncbi_gtdb" ]]
+then
+    echo "Running GTDB taxon assignment step."
+    # output
+    gtdb_input_orfs="${outdir}/${prefix}.orfs.faa"
+    
+
+    for kingdom in ${kingdoms[@]};do
+
+        kingdom_fasta="${outdir}/${prefix}.${kingdom}.fna"
+        
+        orf_prefixes="${outdir}/${prefix}.${kingdom}.contigIDs.txt"
+        orf_ids="${outdir}/${prefix}.${kingdom}.orfIDs.txt"
+        kingdom_orfs="${outdir}/${prefix}.${kingdom}.orfs.faa"
+
+        if [ ! -f $kingdom_fasta ]
+        then
+            echo "${kingdom_fasta} does not exist, skipping..."
+            continue
+        fi
+
+        # Retrieve contig IDs from kingdom fasta file
+        set -x
+        grep ">" $kingdom_fasta | \
+            sed 's/^>//' | \
+            sed 's/$/_/' | \
+            cut -f1 -d" " > $orf_prefixes
+        # Retrieve ORF IDs from contig IDs
+        grep -f $orf_prefixes $orfs | sed 's/^>//' | cut -f1 -d" " > $orf_ids
+        # Retrieve ORF seqs from ORF IDs
+        seqkit grep \
+            --pattern-file $orf_ids \
+            --out-file $kingdom_orfs \
+            $orfs
+        # Concatenate kingdom ORFs to single file for GTDB blastp
+        cat $kingdom_orfs >> $gtdb_input_orfs
+        { set +x; } 2>/dev/null
+    done
+    dbtype="gtdb"
+    prefix="${simple_name}.${dbtype}"
+    
+    # Step 5.2: Run blastp
+    # input:
+    # $gtdb_input_orfs --> Generated from step 5.1
+    gtdb_dmnd_db=$(find $gtdb -name "gtdb.dmnd") # generated using autometa-setup-gtdb (Must be performed prior to using this script)
+    # output
+    blast="${outdir}/${prefix}.blastp.tsv"
+
+    # script
+    set -x
+    diamond blastp \
+        --query $gtdb_input_orfs \
+        --db $gtdb_dmnd_db \
+        --evalue 1e-5 \
+        --max-target-seqs 200 \
+        --threads $cpus \
+        --outfmt 6 \
+        --out $blast
+    { set +x; } 2>/dev/null
+
+    #Step 5.3: Determine LCA
+    # input:
+    # $blast --> Generated from step 5.2
+    # $gtdb --> User Input
+
+    # output:
+    lca="${outdir}/${prefix}.orfs.lca.tsv"
+    sseqid_to_taxid="${outdir}/${prefix}.orfs.sseqid2taxid.tsv"
+    error_taxids="${outdir}/${prefix}.orfs.errortaxids.tsv"
+
+    # script:
+    set -x
+    autometa-taxonomy-lca \
+        --blast $blast \
+        --dbdir $gtdb \
+        --dbtype $dbtype \
+        --lca-output $lca \
+        --sseqid2taxid-output $sseqid_to_taxid \
+        --lca-error-taxids $error_taxids 
+    { set +x; } 2>/dev/null
+
+    # Step 5.4: Perform Modified Majority vote of ORF LCAs for all contigs that returned hits in blast search
+    # input:
+    # $lca --> Generated from step 5.3
+    # $gtdb --> User Input
+
+    # output:
+    votes="${outdir}/${prefix}.taxids.tsv"
+
+    # script:
+    set -x
+    autometa-taxonomy-majority-vote \
+        --lca $lca \
+        --output $votes \
+        --dbdir $gtdb \
+        --dbtype gtdb
+    { set +x; } 2>/dev/null
+
+    # Step 5.5: Split assigned taxonomies into kingdoms
+    # input:
+    # $votes --> Generated from step 5.4
+    # $outdir --> Generated from step 0
+    # prefix="${prefix}.${dbtype}"
+    # $filtered_assembly --> Generated from step 1
+    # $gtdb --> User Input
+
+    # output:
+    # Will write recovered superkingdoms to $outdir
+    # e.g. ${outdir}/${prefix}.bacteria.fna
+    # e.g. ${outdir}/${prefix}.archaea.fna
+    # e.g. ${outdir}/${prefix}.taxonomy.tsv
+
+    # script:
+    set -x
+    autometa-taxonomy \
+        --votes $votes \
+        --output $outdir \
+        --prefix $prefix \
+        --split-rank-and-write superkingdom \
+        --assembly $filtered_assembly \
+        --dbdir $gtdb \
+        --dbtype gtdb
+    { set +x; } 2>/dev/null
+fi
+
+# Step 6: Perform k-mer counting on respective kingdoms
 
 # input:
 # $kmer_size --> User input
@@ -172,13 +345,10 @@ kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific input:
-    # NOTE: $fasta --> Generated by step 4.3
-    fasta="${outdir}/${simpleName}.${kingdom}.fna"
-
-    # kingdom-specific output:
-    counts="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.tsv"
-    normalized="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.${norm_method}.tsv"
-    embedded="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.${norm_method}.${embed_method}.tsv"
+    fasta="${outdir}/${prefix}.${kingdom}.fna" # NOTE: $prefix is updated according to taxa_routine above
+    counts="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.tsv"
+    normalized="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.${norm_method}.tsv"
+    embedded="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.${norm_method}.${embed_method}.tsv"
 
     if [ ! -f $fasta ]
     then
@@ -186,6 +356,7 @@ for kingdom in ${kingdoms[@]};do
         continue
     fi
     # script:
+    set -x
     autometa-kmers \
         --fasta $fasta \
         --kmers $counts \
@@ -198,32 +369,41 @@ for kingdom in ${kingdoms[@]};do
         --embedding-dimensions $embed_dimensions \
         --cpus $cpus \
         --seed $seed
+    { set +x; } 2>/dev/null
 done
 
-# Step 6: Perform binning on each set of bacterial and archaeal contigs
+# Step 7: Perform binning on each set of bacterial and archaeal contigs
 
 # input:
 # $cpus --> User input
 # $seed --> User input
-taxonomy="${outdir}/${simpleName}.taxonomy.tsv" # Generated by step 4.3
 # $gc_content --> Generated by step 1
+taxonomy="${outdir}/${prefix}.taxonomy.tsv" # NOTE: $prefix is updated according to taxa_routine above
+# $taxonomy is generated by either steps 4.3 or 5.5 depending on whether taxa_routine is 'ncbi' or 'ncbi_gtdb', respectively
 
 kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
     # kingdom-specific input:
-    kmers="${outdir}/${simpleName}.${kingdom}.${kmer_size}mers.${norm_method}.${embed_method}.tsv" # Generated by step 5
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv" # Generated by step 3
+    kmers="${outdir}/${prefix}.${kingdom}.${kmer_size}mers.${norm_method}.${embed_method}.tsv" # Generated by step 6
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv" # Generated by step 3 (before taxon-assignment sub-workflows)
 
     # kingdom-specific output:
-    output_binning="${outdir}/${simpleName}.${kingdom}.${cluster_method}.tsv"
-    output_main="${outdir}/${simpleName}.${kingdom}.${cluster_method}.main.tsv"
+    output_binning="${outdir}/${prefix}.${kingdom}.${cluster_method}.tsv"
+    output_main="${outdir}/${prefix}.${kingdom}.${cluster_method}.main.tsv"
 
     if [ -f $output_main ] && [ -s $output_main ];then
         echo "$(basename $output_main) already exists. continuing..."
         continue
     fi
+
+    if [ ! -f $kmers ]
+        then echo "${kingdom} file not found: kmers: ${kmers}), skipping."
+        continue
+    fi
+
     # script:
+    set -x
     autometa-binning \
         --kmers $kmers \
         --coverages $coverages \
@@ -240,26 +420,29 @@ for kingdom in ${kingdoms[@]};do
         --starting-rank superkingdom \
         --rank-filter superkingdom \
         --rank-name-filter $kingdom
+    { set +x; } 2>/dev/null
 done
 
-# Step 7: Create binning summary files
+# Step 8: Create binning summary files
 
 # input:
 # $ncbi -> User input
+# $gtdb -> User input
 # $assembly -> User input
+# $dbtype -> # NOTE: $prefix is updated according to taxa_routine above
 
 kingdoms=(bacteria archaea)
 
 for kingdom in ${kingdoms[@]};do
 
     # kingdom-specific input:
-    binning_main="${outdir}/${simpleName}.${kingdom}.${cluster_method}.main.tsv" # Generated by step 6
-    markers="${outdir}/${simpleName}.${kingdom}.markers.tsv" # Generated by step 3
+    binning_main="${outdir}/${prefix}.${kingdom}.${cluster_method}.main.tsv" # Generated by step 7
+    markers="${outdir}/${simple_name}.${kingdom}.markers.tsv" # Generated by step 3
 
     # kingdom-specific output:
-    output_stats="${outdir}/${simpleName}_${kingdom}_metabin_stats.tsv"
-    output_taxonomy="${outdir}/${simpleName}_${kingdom}_metabin_taxonomy.tsv"
-    output_metabins="${outdir}/${simpleName}_${kingdom}_metabins"
+    output_stats="${outdir}/${prefix}_${kingdom}_metabin_stats.tsv"
+    output_taxonomy="${outdir}/${prefix}_${kingdom}_metabin_taxonomy.tsv"
+    output_metabins="${outdir}/${prefix}_${kingdom}_metabins"
 
     if [ ! -f $binning_main ]
     then
@@ -267,15 +450,24 @@ for kingdom in ${kingdoms[@]};do
         continue
     fi
 
-    # script:
+    if [[ "$taxa_routine" == "ncbi_gtdb" ]]
+    then
+        dbdir=$gtdb
+    else
+        dbdir=$ncbi
+    fi
+    set -x
     autometa-binning-summary \
         --binning-main $binning_main \
         --markers $markers \
         --metagenome $assembly \
-        --ncbi $ncbi \
+        --dbdir $dbdir \
+        --dbtype $dbtype \
         --output-stats $output_stats \
         --output-taxonomy $output_taxonomy \
         --output-metabins $output_metabins
+    { set +x; } 2>/dev/null
+
 done
 
 #########  END  #########
diff --git a/workflows/autometa_flagged.sh b/workflows/autometa_flagged.sh
index b9ffa89aa..41ef05241 100644
--- a/workflows/autometa_flagged.sh
+++ b/workflows/autometa_flagged.sh
@@ -326,6 +326,7 @@ set -x
 autometa-taxonomy-lca \
     --blast $blast \
     --dbdir $ncbi \
+    --dbtype ncbi \
     --lca-output $lca \
     --sseqid2taxid-output $sseqid2taxid \
     --lca-error-taxids $error_taxids
@@ -338,7 +339,8 @@ set -x
 autometa-taxonomy-majority-vote \
     --lca $lca \
     --output $votes \
-    --dbdir $ncbi
+    --dbdir $ncbi \
+    --dbtype ncbi
 
 # Step 4.3: Split assigned taxonomies into kingdoms
 autometa-taxonomy \
@@ -347,7 +349,8 @@ autometa-taxonomy \
     --prefix $simple_name \
     --split-rank-and-write superkingdom \
     --assembly $filtered_assembly \
-    --ncbi $ncbi
+    --dbdir $ncbi \
+    --dbtype ncbi
 { set +x; } 2>/dev/null
 
 # Step 5: Perform k-mer counting on respective kingdoms
@@ -451,7 +454,8 @@ for kingdom in ${kingdoms[@]};do
         --binning-main $binning_main \
         --markers $markers \
         --metagenome $filtered_assembly \
-        --ncbi $ncbi \
+        --dbdir $ncbi \
+        --dbtype ncbi \
         --output-stats $output_stats \
         --output-taxonomy $output_taxonomy \
         --output-metabins $output_metabins