diff --git a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
index 75dabc4a7e02a..dc7521f8f8c55 100644
--- a/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
+++ b/metadata-ingestion/src/datahub/ingestion/run/pipeline.py
@@ -49,6 +49,7 @@
 from datahub.utilities.lossy_collections import LossyDict, LossyList
 
 logger = logging.getLogger(__name__)
+_REPORT_PRINT_INTERVAL_SECONDS = 60
 
 
 class LoggingCallback(WriteCallback):
@@ -403,7 +404,7 @@ def create(
     def _time_to_print(self) -> bool:
         self.num_intermediate_workunits += 1
         current_time = int(time.time())
-        if current_time - self.last_time_printed > 10:
+        if current_time - self.last_time_printed > _REPORT_PRINT_INTERVAL_SECONDS:
             # we print
             self.num_intermediate_workunits = 0
             self.last_time_printed = current_time
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
index 4beb268448569..588187e8e11c2 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py
@@ -95,6 +95,15 @@ class SnowflakeV2Config(
         description="If enabled, populates the snowflake technical schema and descriptions.",
     )
 
+    include_primary_keys: bool = Field(
+        default=True,
+        description="If enabled, populates the snowflake primary keys.",
+    )
+    include_foreign_keys: bool = Field(
+        default=True,
+        description="If enabled, populates the snowflake foreign keys.",
+    )
+
     include_column_lineage: bool = Field(
         default=True,
         description="Populates table->table and view->table column lineage. Requires appropriate grants given to the role and the Snowflake Enterprise Edition or above.",
@@ -105,6 +114,12 @@ class SnowflakeV2Config(
         description="Populates view->view and table->view column lineage using DataHub's sql parser.",
     )
 
+    lazy_schema_resolver: bool = Field(
+        default=False,
+        description="If enabled, uses lazy schema resolver to resolve schemas for tables and views. "
+        "This is useful if you have a large number of schemas and want to avoid bulk fetching the schema for each table/view.",
+    )
+
     _check_role_grants_removed = pydantic_removed_field("check_role_grants")
     _provision_role_removed = pydantic_removed_field("provision_role")
 
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
index b3eb23b25e0a3..c4b6f597bbb7e 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py
@@ -4,6 +4,7 @@
 from datahub.configuration.time_window_config import BucketDuration
 from datahub.ingestion.source.snowflake.constants import SnowflakeObjectDomain
 from datahub.ingestion.source.snowflake.snowflake_config import DEFAULT_TABLES_DENY_LIST
+from datahub.utilities.prefix_batch_builder import PrefixGroup
 
 SHOW_VIEWS_MAX_PAGE_SIZE = 10000
 
@@ -228,50 +229,50 @@ def show_views_for_database(
 """
 
     @staticmethod
-    def columns_for_schema(schema_name: str, db_name: Optional[str]) -> str:
-        db_clause = f'"{db_name}".' if db_name is not None else ""
-        return f"""
-        select
-        table_catalog AS "TABLE_CATALOG",
-        table_schema AS "TABLE_SCHEMA",
-        table_name AS "TABLE_NAME",
-        column_name AS "COLUMN_NAME",
-        ordinal_position AS "ORDINAL_POSITION",
-        is_nullable AS "IS_NULLABLE",
-        data_type AS "DATA_TYPE",
-        comment AS "COMMENT",
-        character_maximum_length AS "CHARACTER_MAXIMUM_LENGTH",
-        numeric_precision AS "NUMERIC_PRECISION",
-        numeric_scale AS "NUMERIC_SCALE",
-        column_default AS "COLUMN_DEFAULT",
-        is_identity AS "IS_IDENTITY"
-        from {db_clause}information_schema.columns
-        WHERE table_schema='{schema_name}'
-        ORDER BY ordinal_position"""
-
-    @staticmethod
-    def columns_for_table(
-        table_name: str, schema_name: str, db_name: Optional[str]
+    def columns_for_schema(
+        schema_name: str,
+        db_name: str,
+        prefix_groups: Optional[List[PrefixGroup]] = None,
     ) -> str:
-        db_clause = f'"{db_name}".' if db_name is not None else ""
-        return f"""
-        select
-        table_catalog AS "TABLE_CATALOG",
-        table_schema AS "TABLE_SCHEMA",
-        table_name AS "TABLE_NAME",
-        column_name AS "COLUMN_NAME",
-        ordinal_position AS "ORDINAL_POSITION",
-        is_nullable AS "IS_NULLABLE",
-        data_type AS "DATA_TYPE",
-        comment AS "COMMENT",
-        character_maximum_length AS "CHARACTER_MAXIMUM_LENGTH",
-        numeric_precision AS "NUMERIC_PRECISION",
-        numeric_scale AS "NUMERIC_SCALE",
-        column_default AS "COLUMN_DEFAULT",
-        is_identity AS "IS_IDENTITY"
-        from {db_clause}information_schema.columns
-        WHERE table_schema='{schema_name}' and table_name='{table_name}'
-        ORDER BY ordinal_position"""
+        columns_template = """\
+SELECT
+  table_catalog AS "TABLE_CATALOG",
+  table_schema AS "TABLE_SCHEMA",
+  table_name AS "TABLE_NAME",
+  column_name AS "COLUMN_NAME",
+  ordinal_position AS "ORDINAL_POSITION",
+  is_nullable AS "IS_NULLABLE",
+  data_type AS "DATA_TYPE",
+  comment AS "COMMENT",
+  character_maximum_length AS "CHARACTER_MAXIMUM_LENGTH",
+  numeric_precision AS "NUMERIC_PRECISION",
+  numeric_scale AS "NUMERIC_SCALE",
+  column_default AS "COLUMN_DEFAULT",
+  is_identity AS "IS_IDENTITY"
+FROM "{db_name}".information_schema.columns
+WHERE table_schema='{schema_name}' AND {extra_clause}"""
+
+        selects = []
+        if prefix_groups is None:
+            prefix_groups = [PrefixGroup(prefix="", names=[])]
+        for prefix_group in prefix_groups:
+            if prefix_group.prefix == "":
+                extra_clause = "TRUE"
+            elif prefix_group.exact_match:
+                extra_clause = f"table_name = '{prefix_group.prefix}'"
+            else:
+                extra_clause = f"table_name LIKE '{prefix_group.prefix}%'"
+
+            selects.append(
+                columns_template.format(
+                    db_name=db_name, schema_name=schema_name, extra_clause=extra_clause
+                )
+            )
+
+        return (
+            "\nUNION ALL\n".join(selects)
+            + """\nORDER BY table_name, ordinal_position"""
+        )
 
     @staticmethod
     def show_primary_keys_for_schema(schema_name: str, db_name: str) -> str:
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
index d84580a94ab4e..4924546383aa4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py
@@ -117,7 +117,6 @@ class SnowflakeV2Report(
     # "Information schema query returned too much data. Please repeat query with more selective predicates.""
     # This will result in overall increase in time complexity
     num_get_tables_for_schema_queries: int = 0
-    num_get_columns_for_table_queries: int = 0
 
     # these will be non-zero if the user choses to enable the extract_tags = "with_lineage" option, which requires
     # individual queries per object (database, schema, table) and an extra query per table to get the tags on the columns.
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
index 3254224e437a6..4bc684a22514c 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema.py
@@ -3,7 +3,7 @@
 from collections import defaultdict
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, Iterable, List, MutableMapping, Optional
 
 from snowflake.connector import SnowflakeConnection
 
@@ -15,6 +15,8 @@
 )
 from datahub.ingestion.source.snowflake.snowflake_utils import SnowflakeQueryMixin
 from datahub.ingestion.source.sql.sql_generic import BaseColumn, BaseTable, BaseView
+from datahub.utilities.file_backed_collections import FileBackedDict
+from datahub.utilities.prefix_batch_builder import build_prefix_batches
 from datahub.utilities.serialized_lru_cache import serialized_lru_cache
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -379,58 +381,48 @@ def get_views_for_database(self, db_name: str) -> Dict[str, List[SnowflakeView]]
 
     @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM)
     def get_columns_for_schema(
-        self, schema_name: str, db_name: str
-    ) -> Optional[Dict[str, List[SnowflakeColumn]]]:
-        columns: Dict[str, List[SnowflakeColumn]] = {}
-        try:
-            cur = self.query(SnowflakeQuery.columns_for_schema(schema_name, db_name))
-        except Exception as e:
-            logger.debug(
-                f"Failed to get all columns for schema - {schema_name}", exc_info=e
-            )
-            # Error - Information schema query returned too much data.
-            # Please repeat query with more selective predicates.
-            return None
-
-        for column in cur:
-            if column["TABLE_NAME"] not in columns:
-                columns[column["TABLE_NAME"]] = []
-            columns[column["TABLE_NAME"]].append(
-                SnowflakeColumn(
-                    name=column["COLUMN_NAME"],
-                    ordinal_position=column["ORDINAL_POSITION"],
-                    is_nullable=column["IS_NULLABLE"] == "YES",
-                    data_type=column["DATA_TYPE"],
-                    comment=column["COMMENT"],
-                    character_maximum_length=column["CHARACTER_MAXIMUM_LENGTH"],
-                    numeric_precision=column["NUMERIC_PRECISION"],
-                    numeric_scale=column["NUMERIC_SCALE"],
+        self,
+        schema_name: str,
+        db_name: str,
+        # HACK: This key is excluded from the cache key.
+        cache_exclude_all_objects: Iterable[str],
+    ) -> MutableMapping[str, List[SnowflakeColumn]]:
+        all_objects = list(cache_exclude_all_objects)
+
+        columns: MutableMapping[str, List[SnowflakeColumn]] = {}
+        if len(all_objects) > 10000:
+            # For massive schemas, use a FileBackedDict to avoid memory issues.
+            columns = FileBackedDict()
+
+        object_batches = build_prefix_batches(
+            all_objects, max_batch_size=10000, max_groups_in_batch=5
+        )
+        for batch_index, object_batch in enumerate(object_batches):
+            if batch_index > 0:
+                logger.info(
+                    f"Still fetching columns for {db_name}.{schema_name} - batch {batch_index + 1} of {len(object_batches)}"
                 )
+            query = SnowflakeQuery.columns_for_schema(
+                schema_name, db_name, object_batch
             )
-        return columns
-
-    def get_columns_for_table(
-        self, table_name: str, schema_name: str, db_name: str
-    ) -> List[SnowflakeColumn]:
-        columns: List[SnowflakeColumn] = []
 
-        cur = self.query(
-            SnowflakeQuery.columns_for_table(table_name, schema_name, db_name),
-        )
-
-        for column in cur:
-            columns.append(
-                SnowflakeColumn(
-                    name=column["COLUMN_NAME"],
-                    ordinal_position=column["ORDINAL_POSITION"],
-                    is_nullable=column["IS_NULLABLE"] == "YES",
-                    data_type=column["DATA_TYPE"],
-                    comment=column["COMMENT"],
-                    character_maximum_length=column["CHARACTER_MAXIMUM_LENGTH"],
-                    numeric_precision=column["NUMERIC_PRECISION"],
-                    numeric_scale=column["NUMERIC_SCALE"],
+            cur = self.query(query)
+
+            for column in cur:
+                if column["TABLE_NAME"] not in columns:
+                    columns[column["TABLE_NAME"]] = []
+                columns[column["TABLE_NAME"]].append(
+                    SnowflakeColumn(
+                        name=column["COLUMN_NAME"],
+                        ordinal_position=column["ORDINAL_POSITION"],
+                        is_nullable=column["IS_NULLABLE"] == "YES",
+                        data_type=column["DATA_TYPE"],
+                        comment=column["COMMENT"],
+                        character_maximum_length=column["CHARACTER_MAXIMUM_LENGTH"],
+                        numeric_precision=column["NUMERIC_PRECISION"],
+                        numeric_scale=column["NUMERIC_SCALE"],
+                    )
                 )
-            )
         return columns
 
     @serialized_lru_cache(maxsize=SCHEMA_PARALLELISM)
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
index 920cf741770c3..b6f16cd671b8d 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_schema_gen.py
@@ -1,4 +1,5 @@
 import concurrent.futures
+import itertools
 import logging
 import queue
 from typing import Dict, Iterable, List, Optional, Union
@@ -321,7 +322,7 @@ def _process_schema_worker(snowflake_schema: SnowflakeSchema) -> None:
 
             # Read from the queue and yield the work units until all futures are done.
             while True:
-                if q.empty():
+                if not q.empty():
                     while not q.empty():
                         yield q.get_nowait()
                 else:
@@ -394,18 +395,24 @@ def _process_schema(
         if self.config.include_technical_schema:
             yield from self.gen_schema_containers(snowflake_schema, db_name)
 
+        # We need to do this first so that we can use it when fetching columns.
         if self.config.include_tables:
             tables = self.fetch_tables_for_schema(
                 snowflake_schema, db_name, schema_name
             )
+        if self.config.include_views:
+            views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name)
+
+        if self.config.include_tables:
             db_tables[schema_name] = tables
 
             if self.config.include_technical_schema:
                 data_reader = self.make_data_reader()
                 for table in tables:
                     table_wu_generator = self._process_table(
-                        table, schema_name, db_name
+                        table, snowflake_schema, db_name
                     )
+
                     yield from classification_workunit_processor(
                         table_wu_generator,
                         self.classification_handler,
@@ -414,7 +421,6 @@ def _process_schema(
                     )
 
         if self.config.include_views:
-            views = self.fetch_views_for_schema(snowflake_schema, db_name, schema_name)
             if (
                 self.aggregator
                 and self.config.include_view_lineage
@@ -434,7 +440,7 @@ def _process_schema(
 
             if self.config.include_technical_schema:
                 for view in views:
-                    yield from self._process_view(view, schema_name, db_name)
+                    yield from self._process_view(view, snowflake_schema, db_name)
 
         if self.config.include_technical_schema and snowflake_schema.tags:
             for tag in snowflake_schema.tags:
@@ -522,16 +528,27 @@ def make_data_reader(self) -> Optional[SnowflakeDataReader]:
     def _process_table(
         self,
         table: SnowflakeTable,
-        schema_name: str,
+        snowflake_schema: SnowflakeSchema,
         db_name: str,
     ) -> Iterable[MetadataWorkUnit]:
+        schema_name = snowflake_schema.name
         table_identifier = self.get_dataset_identifier(table.name, schema_name, db_name)
 
-        self.fetch_columns_for_table(table, schema_name, db_name, table_identifier)
-
-        self.fetch_pk_for_table(table, schema_name, db_name, table_identifier)
-
-        self.fetch_foreign_keys_for_table(table, schema_name, db_name, table_identifier)
+        try:
+            table.columns = self.get_columns_for_table(
+                table.name, snowflake_schema, db_name
+            )
+            table.column_count = len(table.columns)
+            if self.config.extract_tags != TagOption.skip:
+                table.column_tags = self.tag_extractor.get_column_tags_for_table(
+                    table.name, schema_name, db_name
+                )
+        except Exception as e:
+            logger.debug(
+                f"Failed to get columns for table {table_identifier} due to error {e}",
+                exc_info=e,
+            )
+            self.report_warning("Failed to get columns for table", table_identifier)
 
         if self.config.extract_tags != TagOption.skip:
             table.tags = self.tag_extractor.get_tags_on_object(
@@ -542,12 +559,13 @@ def _process_table(
             )
 
         if self.config.include_technical_schema:
-            if table.tags:
-                for tag in table.tags:
-                    yield from self._process_tag(tag)
-            for column_name in table.column_tags:
-                for tag in table.column_tags[column_name]:
-                    yield from self._process_tag(tag)
+            if self.config.include_primary_keys:
+                self.fetch_pk_for_table(table, schema_name, db_name, table_identifier)
+
+            if self.config.include_foreign_keys:
+                self.fetch_foreign_keys_for_table(
+                    table, schema_name, db_name, table_identifier
+                )
 
             yield from self.gen_dataset_workunits(table, schema_name, db_name)
 
@@ -587,37 +605,19 @@ def fetch_pk_for_table(
             )
             self.report_warning("Failed to get primary key for table", table_identifier)
 
-    def fetch_columns_for_table(
-        self,
-        table: SnowflakeTable,
-        schema_name: str,
-        db_name: str,
-        table_identifier: str,
-    ) -> None:
-        try:
-            table.columns = self.get_columns_for_table(table.name, schema_name, db_name)
-            table.column_count = len(table.columns)
-            if self.config.extract_tags != TagOption.skip:
-                table.column_tags = self.tag_extractor.get_column_tags_for_table(
-                    table.name, schema_name, db_name
-                )
-        except Exception as e:
-            logger.debug(
-                f"Failed to get columns for table {table_identifier} due to error {e}",
-                exc_info=e,
-            )
-            self.report_warning("Failed to get columns for table", table_identifier)
-
     def _process_view(
         self,
         view: SnowflakeView,
-        schema_name: str,
+        snowflake_schema: SnowflakeSchema,
         db_name: str,
     ) -> Iterable[MetadataWorkUnit]:
+        schema_name = snowflake_schema.name
         view_name = self.get_dataset_identifier(view.name, schema_name, db_name)
 
         try:
-            view.columns = self.get_columns_for_table(view.name, schema_name, db_name)
+            view.columns = self.get_columns_for_table(
+                view.name, snowflake_schema, db_name
+            )
             if self.config.extract_tags != TagOption.skip:
                 view.column_tags = self.tag_extractor.get_column_tags_for_table(
                     view.name, schema_name, db_name
@@ -638,13 +638,6 @@ def _process_view(
             )
 
         if self.config.include_technical_schema:
-            if view.tags:
-                for tag in view.tags:
-                    yield from self._process_tag(tag)
-            for column_name in view.column_tags:
-                for tag in view.column_tags[column_name]:
-                    yield from self._process_tag(tag)
-
             yield from self.gen_dataset_workunits(view, schema_name, db_name)
 
     def _process_tag(self, tag: SnowflakeTag) -> Iterable[MetadataWorkUnit]:
@@ -663,6 +656,13 @@ def gen_dataset_workunits(
         schema_name: str,
         db_name: str,
     ) -> Iterable[MetadataWorkUnit]:
+        if table.tags:
+            for tag in table.tags:
+                yield from self._process_tag(tag)
+        for column_name in table.column_tags:
+            for tag in table.column_tags[column_name]:
+                yield from self._process_tag(tag)
+
         dataset_name = self.get_dataset_identifier(table.name, schema_name, db_name)
         dataset_urn = self.gen_dataset_urn(dataset_name)
 
@@ -1015,17 +1015,16 @@ def get_views_for_schema(
         return views.get(schema_name, [])
 
     def get_columns_for_table(
-        self, table_name: str, schema_name: str, db_name: str
+        self, table_name: str, snowflake_schema: SnowflakeSchema, db_name: str
     ) -> List[SnowflakeColumn]:
-        columns = self.data_dictionary.get_columns_for_schema(schema_name, db_name)
-
-        # get all columns for schema failed,
-        # falling back to get columns for table
-        if columns is None:
-            self.report.num_get_columns_for_table_queries += 1
-            return self.data_dictionary.get_columns_for_table(
-                table_name, schema_name, db_name
-            )
+        schema_name = snowflake_schema.name
+        columns = self.data_dictionary.get_columns_for_schema(
+            schema_name,
+            db_name,
+            cache_exclude_all_objects=itertools.chain(
+                snowflake_schema.tables, snowflake_schema.views
+            ),
+        )
 
         # Access to table but none of its columns - is this possible ?
         return columns.get(table_name, [])
diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
index 06d7042e02456..f39620b79cfd4 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py
@@ -158,6 +158,7 @@ def __init__(self, ctx: PipelineContext, config: SnowflakeV2Config):
                         and self.config.include_tables
                         and self.config.include_views
                     )
+                    and not self.config.lazy_schema_resolver
                     else None
                 ),
                 generate_usage_statistics=False,
diff --git a/metadata-ingestion/src/datahub/utilities/prefix_batch_builder.py b/metadata-ingestion/src/datahub/utilities/prefix_batch_builder.py
new file mode 100644
index 0000000000000..b6da7a1fbd152
--- /dev/null
+++ b/metadata-ingestion/src/datahub/utilities/prefix_batch_builder.py
@@ -0,0 +1,81 @@
+import dataclasses
+from collections import defaultdict
+from typing import List
+
+
+@dataclasses.dataclass
+class PrefixGroup:
+    prefix: str
+    names: List[str]  # every name in the list has the same prefix
+    exact_match: bool = False
+
+
+def build_prefix_batches(
+    names: List[str], max_batch_size: int, max_groups_in_batch: int
+) -> List[List[PrefixGroup]]:
+    """Split the names into a list of batches, where each batch is a list of groups and each group is a list of names with a common prefix."""
+
+    groups = _build_prefix_groups(names, max_batch_size=max_batch_size)
+    batches = _batch_prefix_groups(
+        groups, max_batch_size=max_batch_size, max_groups_in_batch=max_groups_in_batch
+    )
+    return batches
+
+
+def _build_prefix_groups(names: List[str], max_batch_size: int) -> List[PrefixGroup]:
+    """Given a list of names, group them by shared prefixes such that no group is larger than `max_batch_size`."""
+
+    def split_group(group: PrefixGroup) -> List[PrefixGroup]:
+        if len(group.names) <= max_batch_size:
+            return [group]
+
+        result = []
+
+        # Split into subgroups by the next character.
+        prefix_length = len(group.prefix) + 1
+        subgroups = defaultdict(list)
+        for name in group.names:
+            if len(name) <= prefix_length:
+                # Handle cases where a single name is also the prefix for a large number of names.
+                # For example, if NAME and NAME_{1..10000} are both in the list.
+                result.append(PrefixGroup(prefix=name, names=[name], exact_match=True))
+                continue
+
+            prefix = name[:prefix_length]
+            subgroups[prefix].append(name)
+
+        for prefix, names in subgroups.items():
+            result.extend(split_group(PrefixGroup(prefix=prefix, names=names)))
+
+        return result
+
+    return split_group(PrefixGroup(prefix="", names=sorted(names)))
+
+
+def _batch_prefix_groups(
+    groups: List[PrefixGroup], max_batch_size: int, max_groups_in_batch: int
+) -> List[List[PrefixGroup]]:
+    """Batch the groups together, so that no batch's total is larger than `max_batch_size`
+    and no group in a batch is larger than `max_group_size`."""
+
+    # A batch is a set of groups.
+
+    # This is a variant of the 1D bin packing problem, which is actually NP-hard.
+    # However, we'll just use a greedy algorithm for simplicity.
+
+    batches = []
+    current_batch_size = 0
+    batch: List[PrefixGroup] = []
+    for group in groups:
+        if (
+            current_batch_size + len(group.names) > max_batch_size
+            or len(batch) > max_groups_in_batch
+        ):
+            batches.append(batch)
+            batch = []
+            current_batch_size = 0
+        batch.append(group)
+        current_batch_size += len(group.names)
+    if batch:
+        batches.append(batch)
+    return batches
diff --git a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
index 23523501ee0b4..b5f490720340c 100644
--- a/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
+++ b/metadata-ingestion/src/datahub/utilities/serialized_lru_cache.py
@@ -41,7 +41,7 @@ def decorator(func: Callable[_F, _T]) -> Callable[_F, _T]:
         def wrapper(*args: _F.args, **kwargs: _F.kwargs) -> _T:
             # We need a type ignore here because there's no way for us to require that
             # the args and kwargs are hashable while using ParamSpec.
-            key: _Key = cachetools.keys.hashkey(*args, **kwargs)  # type: ignore
+            key: _Key = cachetools.keys.hashkey(*args, **{k: v for k, v in kwargs.items() if "cache_exclude" not in k})  # type: ignore
 
             with cache_lock:
                 if key in cache:
diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py
index ea08a94267480..881fac96f82e8 100644
--- a/metadata-ingestion/tests/integration/snowflake/common.py
+++ b/metadata-ingestion/tests/integration/snowflake/common.py
@@ -252,26 +252,11 @@ def default_query_results(  # noqa: C901
             for view_idx in range(1, num_views + 1)
         ]
     elif query == SnowflakeQuery.columns_for_schema("TEST_SCHEMA", "TEST_DB"):
-        raise Exception("Information schema query returned too much data")
-    elif query in [
-        *[
-            SnowflakeQuery.columns_for_table(
-                f"TABLE_{tbl_idx}", "TEST_SCHEMA", "TEST_DB"
-            )
-            for tbl_idx in range(1, num_tables + 1)
-        ],
-        *[
-            SnowflakeQuery.columns_for_table(
-                f"VIEW_{view_idx}", "TEST_SCHEMA", "TEST_DB"
-            )
-            for view_idx in range(1, num_views + 1)
-        ],
-    ]:
         return [
             {
-                # "TABLE_CATALOG": "TEST_DB",
-                # "TABLE_SCHEMA": "TEST_SCHEMA",
-                # "TABLE_NAME": "TABLE_{}".format(tbl_idx),
+                "TABLE_CATALOG": "TEST_DB",
+                "TABLE_SCHEMA": "TEST_SCHEMA",
+                "TABLE_NAME": table_name,
                 "COLUMN_NAME": f"COL_{col_idx}",
                 "ORDINAL_POSITION": col_idx,
                 "IS_NULLABLE": "NO",
@@ -281,6 +266,10 @@ def default_query_results(  # noqa: C901
                 "NUMERIC_PRECISION": None if col_idx > 1 else 38,
                 "NUMERIC_SCALE": None if col_idx > 1 else 0,
             }
+            for table_name in (
+                [f"TABLE_{tbl_idx}" for tbl_idx in range(1, num_tables + 1)]
+                + [f"VIEW_{view_idx}" for view_idx in range(1, num_views + 1)]
+            )
             for col_idx in range(1, num_cols + 1)
         ]
     elif query in (
diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
index 3a37382de65b4..23f5c10b10f8e 100644
--- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
+++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py
@@ -10,11 +10,7 @@
 from datahub.ingestion.source.snowflake import snowflake_query
 from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
 from datahub.ingestion.source.snowflake.snowflake_query import SnowflakeQuery
-from tests.integration.snowflake.common import (
-    FROZEN_TIME,
-    NUM_TABLES,
-    default_query_results,
-)
+from tests.integration.snowflake.common import FROZEN_TIME, default_query_results
 
 
 def query_permission_error_override(fn, override_for_query, error_msg):
@@ -168,10 +164,7 @@ def test_snowflake_list_columns_error_causes_pipeline_warning(
         sf_cursor.execute.side_effect = query_permission_error_override(
             default_query_results,
             [
-                SnowflakeQuery.columns_for_table(
-                    f"TABLE_{tbl_idx}", "TEST_SCHEMA", "TEST_DB"
-                )
-                for tbl_idx in range(1, NUM_TABLES + 1)
+                SnowflakeQuery.columns_for_schema("TEST_SCHEMA", "TEST_DB"),
             ],
             "Database 'TEST_DB' does not exist or not authorized.",
         )
diff --git a/metadata-ingestion/tests/unit/utilities/test_prefix_patch_builder.py b/metadata-ingestion/tests/unit/utilities/test_prefix_patch_builder.py
new file mode 100644
index 0000000000000..19af7e9f66c1a
--- /dev/null
+++ b/metadata-ingestion/tests/unit/utilities/test_prefix_patch_builder.py
@@ -0,0 +1,45 @@
+from datahub.utilities.prefix_batch_builder import PrefixGroup, build_prefix_batches
+
+
+def test_build_prefix_batches_empty_input():
+    assert build_prefix_batches([], 10, 5) == [[PrefixGroup(prefix="", names=[])]]
+
+
+def test_build_prefix_batches_single_group():
+    names = ["apple", "applet", "application"]
+    expected = [[PrefixGroup(prefix="", names=names)]]
+    assert build_prefix_batches(names, 10, 5) == expected
+
+
+def test_build_prefix_batches_multiple_groups():
+    names = ["apple", "applet", "banana", "band", "bandana"]
+    expected = [
+        [PrefixGroup(prefix="a", names=["apple", "applet"])],
+        [PrefixGroup(prefix="b", names=["banana", "band", "bandana"])],
+    ]
+    assert build_prefix_batches(names, 4, 5) == expected
+
+
+def test_build_prefix_batches_exceeds_max_batch_size():
+    names = [
+        "app",
+        "apple",
+        "applet",
+        "application",
+        "banana",
+        "band",
+        "bandana",
+        "candy",
+        "candle",
+        "dog",
+    ]
+    expected = [
+        [PrefixGroup(prefix="app", names=["app"], exact_match=True)],
+        [PrefixGroup(prefix="app", names=["apple", "applet", "application"])],
+        [PrefixGroup(prefix="b", names=["banana", "band", "bandana"])],
+        [
+            PrefixGroup(prefix="c", names=["candle", "candy"]),
+            PrefixGroup(prefix="d", names=["dog"]),
+        ],
+    ]
+    assert build_prefix_batches(names, 3, 2) == expected