From a9b519b384d861905753023d1c3d9165a590887b Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Tue, 19 Dec 2023 12:58:18 +0530
Subject: [PATCH 01/30] Use Collector.setWeight to improve aggregation
 performance

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 26 +++++++++++++++++++
 .../aggregations/support/ValuesSource.java    |  2 +-
 .../search/internal/ContextIndexSearcher.java |  5 ++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 5ed899408ab40..b3552360296b8 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -37,6 +37,9 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
@@ -61,6 +64,7 @@
 import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes;
 import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
 import org.opensearch.search.aggregations.support.ValuesSource;
+import org.opensearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
 import org.opensearch.search.internal.SearchContext;
 
 import java.io.IOException;
@@ -70,6 +74,7 @@
 import java.util.function.Function;
 import java.util.function.LongPredicate;
 import java.util.function.LongUnaryOperator;
+import java.util.logging.Logger;
 
 import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder;
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
@@ -80,11 +85,16 @@
  * @opensearch.internal
  */
 public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
+
+    // testing only - will remove
+    protected Logger logger = Logger.getLogger(GlobalOrdinalsStringTermsAggregator.class.getName());
     protected final ResultStrategy<?, ?, ?> resultStrategy;
     protected final ValuesSource.Bytes.WithOrdinals valuesSource;
 
     private final LongPredicate acceptedGlobalOrdinals;
     private final long valueCount;
+
+    private Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
     protected int segmentsWithSingleValuedOrds = 0;
@@ -142,8 +152,24 @@ String descriptCollectionStrategy() {
         return collectionStrategy.describe();
     }
 
+    public void setWeight(Weight weight) {
+        this.weight = weight;
+    }
+
     @Override
     public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
+        if (weight != null && weight.getQuery() instanceof MatchAllDocsQuery) {
+            if ((weight.count(ctx) == 0)
+                && Terms.getTerms(ctx.reader(), String.valueOf(((WithOrdinals.FieldData) valuesSource).indexFieldData.getFieldName()))
+                    .size() == 0) {
+                return LeafBucketCollector.NO_OP_COLLECTOR;
+                // } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) {
+                // no deleted documents & top level query matches everything
+                // iterate over the terms - doc frequency for each termsEnum directly
+                // return appropriate LeafCollector
+            }
+        }
+
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
         SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
index 3ce1f0447dfcc..c3a5d445be7a1 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
@@ -238,7 +238,7 @@ public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
              */
             public static class FieldData extends WithOrdinals {
 
-                protected final IndexOrdinalsFieldData indexFieldData;
+                public final IndexOrdinalsFieldData indexFieldData;
 
                 public FieldData(IndexOrdinalsFieldData indexFieldData) {
                     this.indexFieldData = indexFieldData;
diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
index 403b0b545c113..ec3ed2332d0b8 100644
--- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
+++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
@@ -387,6 +387,11 @@ public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
                         return null;
                     }
                 }
+
+                @Override
+                public int count(LeafReaderContext context) throws IOException {
+                    return weight.count(context);
+                }
             };
         } else {
             return weight;

From 21761c9af1fc9dd0ab2fe41ca95384e892fb4665 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Tue, 23 Jan 2024 14:15:01 -0800
Subject: [PATCH 02/30] Add case for no deleted docs

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 99 ++++++++++++++++---
 1 file changed, 86 insertions(+), 13 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index b3552360296b8..21008ba0e2083 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -35,10 +35,11 @@
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -49,6 +50,7 @@
 import org.opensearch.common.util.LongHash;
 import org.opensearch.core.common.io.stream.StreamOutput;
 import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.index.mapper.DocCountFieldMapper;
 import org.opensearch.search.DocValueFormat;
 import org.opensearch.search.aggregations.AggregationExecutionException;
 import org.opensearch.search.aggregations.Aggregator;
@@ -64,7 +66,6 @@
 import org.opensearch.search.aggregations.bucket.terms.SignificanceLookup.BackgroundFrequencyForBytes;
 import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
 import org.opensearch.search.aggregations.support.ValuesSource;
-import org.opensearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
 import org.opensearch.search.internal.SearchContext;
 
 import java.io.IOException;
@@ -78,6 +79,7 @@
 
 import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder;
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 
 /**
  * An aggregator of string values that relies on global ordinals in order to build buckets.
@@ -94,6 +96,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
     private final LongPredicate acceptedGlobalOrdinals;
     private final long valueCount;
 
+    private final String fieldName;
+
     private Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
@@ -146,6 +150,7 @@ public GlobalOrdinalsStringTermsAggregator(
                 return new DenseGlobalOrds();
             });
         }
+        this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).indexFieldData.getFieldName();
     }
 
     String descriptCollectionStrategy() {
@@ -156,22 +161,80 @@ public void setWeight(Weight weight) {
         this.weight = weight;
     }
 
-    @Override
-    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
-        if (weight != null && weight.getQuery() instanceof MatchAllDocsQuery) {
-            if ((weight.count(ctx) == 0)
-                && Terms.getTerms(ctx.reader(), String.valueOf(((WithOrdinals.FieldData) valuesSource).indexFieldData.getFieldName()))
-                    .size() == 0) {
-                return LeafBucketCollector.NO_OP_COLLECTOR;
-                // } else if (weight.count(ctx) == ctx.reader().maxDoc() && weight.getQuery() instanceof MatchAllDocsQuery) {
-                // no deleted documents & top level query matches everything
-                // iterate over the terms - doc frequency for each termsEnum directly
-                // return appropriate LeafCollector
+    /**
+     Collects term frequencies for a given field from a LeafReaderContext.
+     @param ctx The LeafReaderContext to collect terms from
+     @param ords The SortedSetDocValues for the field's ordinals
+     @param ordCountConsumer A consumer to accept collected term frequencies
+     @return A LeafBucketCollector implementation that throws an exception, since collection is complete
+     @throws IOException If an I/O error occurs during reading */
+    LeafBucketCollector termDocFreqCollector(LeafReaderContext ctx, SortedSetDocValues ords, BiConsumer<Long, Integer> ordCountConsumer)
+        throws IOException {
+        // long n0 = System.nanoTime(), n1, n2, n3, n4, n5 = 0;
+        if (weight.count(ctx) != ctx.reader().maxDoc()) {
+            // Top-level query does not match all docs in this segment.
+            return null;
+        }
+        // n1 = System.nanoTime();
+
+        Terms aggTerms = ctx.reader().terms(this.fieldName);
+        if (aggTerms == null) {
+            // Field is not indexed.
+            return null;
+        }
+        // n2 = System.nanoTime();
+        NumericDocValues docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME);
+        if (docCountValues.nextDoc() != NO_MORE_DOCS) {
+            // This segment has at least one document with the _doc_count field.
+            return null;
+        }
+        // n3 = System.nanoTime();
+        TermsEnum indexTermsEnum = aggTerms.iterator();
+        BytesRef indexTerm = indexTermsEnum.next();
+        TermsEnum ordinalTermsEnum = ords.termsEnum();
+        BytesRef ordinalTerm = ordinalTermsEnum.next();
+        // n4 = System.nanoTime();
+        while (indexTerm != null && ordinalTerm != null) {
+            int compare = indexTerm.compareTo(ordinalTerm);
+            if (compare == 0) {
+                if (acceptedGlobalOrdinals.test(ordinalTermsEnum.ord())) {
+                    ordCountConsumer.accept(ordinalTermsEnum.ord(), indexTermsEnum.docFreq());
+                }
+                indexTerm = indexTermsEnum.next();
+                ordinalTerm = ordinalTermsEnum.next();
+            } else if (compare < 0) {
+                indexTerm = indexTermsEnum.next();
+            } else {
+                ordinalTerm = ordinalTermsEnum.next();
             }
+            // n5 = System.nanoTime();
         }
+        // logger.info((n1 - n0) + " " + (n2 - n1) + " " + (n3 - n2) + " " + (n4 - n3) + " " + (n5 - n4));
+        // return new LeafBucketCollector() {
+        // @Override
+        // public void collect(int doc, long owningBucketOrd) {
+        // throw new CollectionTerminatedException();
+        // }
+        // };
+        return LeafBucketCollector.NO_OP_COLLECTOR;
+    }
 
+    @Override
+    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException {
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
+
+        if (collectionStrategy instanceof DenseGlobalOrds && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
+            LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
+                ctx,
+                globalOrds,
+                (o, c) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, o), c)
+            );
+            if (termDocFreqCollector != null) {
+                return termDocFreqCollector;
+            }
+        }
+
         SortedDocValues singleValues = DocValues.unwrapSingleton(globalOrds);
         if (singleValues != null) {
             segmentsWithSingleValuedOrds++;
@@ -369,6 +432,16 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
             segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
+
+            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
+                ctx,
+                segmentOrds,
+                (o, c) -> segmentDocCounts.increment(o + 1, c)
+            );
+            if (termDocFreqCollector != null) {
+                return termDocFreqCollector;
+            }
+
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
             mapping = valuesSource.globalOrdinalsMapping(ctx);
             // Dense mode doesn't support include/exclude so we don't have to check it here.

From 0ed5c0d98d68dbfc43050b5b9b2dc5b7337ab537 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 24 Jan 2024 11:31:47 -0800
Subject: [PATCH 03/30] Minor refactoring - reverting making a field public

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java   | 2 +-
 .../search/aggregations/support/ValuesSource.java           | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 21008ba0e2083..870c6be2e037c 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -150,7 +150,7 @@ public GlobalOrdinalsStringTermsAggregator(
                 return new DenseGlobalOrds();
             });
         }
-        this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).indexFieldData.getFieldName();
+        this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName();
     }
 
     String descriptCollectionStrategy() {
diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
index c3a5d445be7a1..1f4dd429e094e 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java
@@ -238,12 +238,16 @@ public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
              */
             public static class FieldData extends WithOrdinals {
 
-                public final IndexOrdinalsFieldData indexFieldData;
+                protected final IndexOrdinalsFieldData indexFieldData;
 
                 public FieldData(IndexOrdinalsFieldData indexFieldData) {
                     this.indexFieldData = indexFieldData;
                 }
 
+                public String getIndexFieldName() {
+                    return this.indexFieldData.getFieldName();
+                }
+
                 @Override
                 public SortedBinaryDocValues bytesValues(LeafReaderContext context) {
                     final LeafOrdinalsFieldData atomicFieldData = indexFieldData.load(context);

From 554e82d225c205978a40c1d3dd6d6991369ad0d2 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 24 Jan 2024 11:42:58 -0800
Subject: [PATCH 04/30] Fixing weight and declaring it a s final

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 870c6be2e037c..7652267c7a2eb 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -40,6 +40,7 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -98,7 +99,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
 
     private final String fieldName;
 
-    private Weight weight;
+    private final Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
     protected int segmentsWithSingleValuedOrds = 0;
@@ -151,16 +152,13 @@ public GlobalOrdinalsStringTermsAggregator(
             });
         }
         this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName();
+        this.weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
     }
 
     String descriptCollectionStrategy() {
         return collectionStrategy.describe();
     }
 
-    public void setWeight(Weight weight) {
-        this.weight = weight;
-    }
-
     /**
      Collects term frequencies for a given field from a LeafReaderContext.
      @param ctx The LeafReaderContext to collect terms from

From 050519a366a60d0cf026b4d77bd5a8097fd5062f Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 24 Jan 2024 16:21:03 -0800
Subject: [PATCH 05/30] Revert "Fixing weight and declaring it a s final"

This reverts commit 851b7599d4735e910cc9cc37d9ab4eb8176b29a2.

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 7652267c7a2eb..870c6be2e037c 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -40,7 +40,6 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -99,7 +98,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
 
     private final String fieldName;
 
-    private final Weight weight;
+    private Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
     protected int segmentsWithSingleValuedOrds = 0;
@@ -152,13 +151,16 @@ public GlobalOrdinalsStringTermsAggregator(
             });
         }
         this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName();
-        this.weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
     }
 
     String descriptCollectionStrategy() {
         return collectionStrategy.describe();
     }
 
+    public void setWeight(Weight weight) {
+        this.weight = weight;
+    }
+
     /**
      Collects term frequencies for a given field from a LeafReaderContext.
      @param ctx The LeafReaderContext to collect terms from

From 7ed030b5a2ad4b6bbdf14a908d84dd2c55900453 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 24 Jan 2024 16:31:43 -0800
Subject: [PATCH 06/30] Bypassing doc iteration

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 58 ++++++++-----------
 .../search/internal/ContextIndexSearcher.java |  3 +
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 870c6be2e037c..7adcf6230b54e 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -75,7 +75,6 @@
 import java.util.function.Function;
 import java.util.function.LongPredicate;
 import java.util.function.LongUnaryOperator;
-import java.util.logging.Logger;
 
 import static org.opensearch.search.aggregations.InternalOrder.isKeyOrder;
 import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
@@ -87,9 +86,6 @@
  * @opensearch.internal
  */
 public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggregator {
-
-    // testing only - will remove
-    protected Logger logger = Logger.getLogger(GlobalOrdinalsStringTermsAggregator.class.getName());
     protected final ResultStrategy<?, ?, ?> resultStrategy;
     protected final ValuesSource.Bytes.WithOrdinals valuesSource;
 
@@ -164,58 +160,52 @@ public void setWeight(Weight weight) {
     /**
      Collects term frequencies for a given field from a LeafReaderContext.
      @param ctx The LeafReaderContext to collect terms from
-     @param ords The SortedSetDocValues for the field's ordinals
+     @param globalOrds The SortedSetDocValues for the field's ordinals
      @param ordCountConsumer A consumer to accept collected term frequencies
-     @return A LeafBucketCollector implementation that throws an exception, since collection is complete
-     @throws IOException If an I/O error occurs during reading */
-    LeafBucketCollector termDocFreqCollector(LeafReaderContext ctx, SortedSetDocValues ords, BiConsumer<Long, Integer> ordCountConsumer)
-        throws IOException {
-        // long n0 = System.nanoTime(), n1, n2, n3, n4, n5 = 0;
+     @return A no-operation LeafBucketCollector implementation, since collection is complete
+     @throws IOException If an I/O error occurs during reading
+     */
+    LeafBucketCollector termDocFreqCollector(
+        LeafReaderContext ctx,
+        SortedSetDocValues globalOrds,
+        BiConsumer<Long, Integer> ordCountConsumer
+    ) throws IOException {
         if (weight.count(ctx) != ctx.reader().maxDoc()) {
             // Top-level query does not match all docs in this segment.
             return null;
         }
-        // n1 = System.nanoTime();
 
-        Terms aggTerms = ctx.reader().terms(this.fieldName);
-        if (aggTerms == null) {
+        Terms segmentTerms = ctx.reader().terms(this.fieldName);
+        if (segmentTerms == null) {
             // Field is not indexed.
             return null;
         }
-        // n2 = System.nanoTime();
+
         NumericDocValues docCountValues = DocValues.getNumeric(ctx.reader(), DocCountFieldMapper.NAME);
         if (docCountValues.nextDoc() != NO_MORE_DOCS) {
             // This segment has at least one document with the _doc_count field.
             return null;
         }
-        // n3 = System.nanoTime();
-        TermsEnum indexTermsEnum = aggTerms.iterator();
+
+        TermsEnum indexTermsEnum = segmentTerms.iterator();
         BytesRef indexTerm = indexTermsEnum.next();
-        TermsEnum ordinalTermsEnum = ords.termsEnum();
-        BytesRef ordinalTerm = ordinalTermsEnum.next();
-        // n4 = System.nanoTime();
+        TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum();
+        BytesRef ordinalTerm = globalOrdinalTermsEnum.next();
+
         while (indexTerm != null && ordinalTerm != null) {
             int compare = indexTerm.compareTo(ordinalTerm);
             if (compare == 0) {
-                if (acceptedGlobalOrdinals.test(ordinalTermsEnum.ord())) {
-                    ordCountConsumer.accept(ordinalTermsEnum.ord(), indexTermsEnum.docFreq());
+                if (acceptedGlobalOrdinals.test(globalOrdinalTermsEnum.ord())) {
+                    ordCountConsumer.accept(globalOrdinalTermsEnum.ord(), indexTermsEnum.docFreq());
                 }
                 indexTerm = indexTermsEnum.next();
-                ordinalTerm = ordinalTermsEnum.next();
+                ordinalTerm = globalOrdinalTermsEnum.next();
             } else if (compare < 0) {
                 indexTerm = indexTermsEnum.next();
             } else {
-                ordinalTerm = ordinalTermsEnum.next();
+                ordinalTerm = globalOrdinalTermsEnum.next();
             }
-            // n5 = System.nanoTime();
         }
-        // logger.info((n1 - n0) + " " + (n2 - n1) + " " + (n3 - n2) + " " + (n4 - n3) + " " + (n5 - n4));
-        // return new LeafBucketCollector() {
-        // @Override
-        // public void collect(int doc, long owningBucketOrd) {
-        // throw new CollectionTerminatedException();
-        // }
-        // };
         return LeafBucketCollector.NO_OP_COLLECTOR;
     }
 
@@ -228,10 +218,10 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
                 ctx,
                 globalOrds,
-                (o, c) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, o), c)
+                (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, ord), docCount)
             );
             if (termDocFreqCollector != null) {
-                return termDocFreqCollector;
+                return null;
             }
         }
 
@@ -436,7 +426,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
                 ctx,
                 segmentOrds,
-                (o, c) -> segmentDocCounts.increment(o + 1, c)
+                (ord, docCount) -> segmentDocCounts.increment(ord + 1, docCount)
             );
             if (termDocFreqCollector != null) {
                 return termDocFreqCollector;
diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
index ec3ed2332d0b8..275a75c33406c 100644
--- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
+++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
@@ -309,6 +309,9 @@ private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collecto
             // See please https://github.com/apache/lucene/pull/964
             collector.setWeight(weight);
             leafCollector = collector.getLeafCollector(ctx);
+            if (leafCollector == null) {
+                return;
+            }
         } catch (CollectionTerminatedException e) {
             // there is no doc of interest in this reader context
             // continue with the following leaf

From d7db848c4092b4ae0368aa268498288f918a9564 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 2 Feb 2024 16:23:33 -0800
Subject: [PATCH 07/30] Refactoring and changelog addition

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 CHANGELOG.md                                         |  1 +
 .../terms/GlobalOrdinalsStringTermsAggregator.java   | 12 +++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e7229a2a0336..2be1d8eda34f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -128,6 +128,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Changed
 - Allow composite aggregation to run under a parent filter aggregation ([#11499](https://github.com/opensearch-project/OpenSearch/pull/11499))
+- Improve string terms aggregation performance using Collector#setWeight ([#11643](https://github.com/opensearch-project/OpenSearch/pull/11643))
 
 ### Deprecated
 
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 7adcf6230b54e..641fab5a2e5da 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -40,6 +40,7 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -92,7 +93,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
     private final LongPredicate acceptedGlobalOrdinals;
     private final long valueCount;
 
-    private final String fieldName;
+    private String fieldName;
 
     private Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
@@ -146,7 +147,8 @@ public GlobalOrdinalsStringTermsAggregator(
                 return new DenseGlobalOrds();
             });
         }
-        this.fieldName = ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName();
+            this.fieldName = (valuesSource instanceof ValuesSource.Bytes.WithOrdinals.FieldData) ?
+                ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName() : null;
     }
 
     String descriptCollectionStrategy() {
@@ -170,6 +172,10 @@ LeafBucketCollector termDocFreqCollector(
         SortedSetDocValues globalOrds,
         BiConsumer<Long, Integer> ordCountConsumer
     ) throws IOException {
+        if (weight == null) {
+            // Calculate weight if not assigned previously
+            this.weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
+        }
         if (weight.count(ctx) != ctx.reader().maxDoc()) {
             // Top-level query does not match all docs in this segment.
             return null;
@@ -221,7 +227,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
                 (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, ord), docCount)
             );
             if (termDocFreqCollector != null) {
-                return null;
+                return termDocFreqCollector;
             }
         }
 

From 7aa424ae6be61ef1420beaf09f75e586215be024 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 2 Feb 2024 16:28:33 -0800
Subject: [PATCH 08/30] Add case for weight.count == 0

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 641fab5a2e5da..671631be0b547 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -176,6 +176,12 @@ LeafBucketCollector termDocFreqCollector(
             // Calculate weight if not assigned previously
             this.weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
         }
+
+        if (weight.count(ctx) == 0) {
+            // No documents matches top level query on this segment, we can skip it
+            return LeafBucketCollector.NO_OP_COLLECTOR;
+        }
+
         if (weight.count(ctx) != ctx.reader().maxDoc()) {
             // Top-level query does not match all docs in this segment.
             return null;

From d52c57848193057ecb942d928ee71519d3123768 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 2 Feb 2024 16:32:34 -0800
Subject: [PATCH 09/30] minor refactoring

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java     | 4 +---
 .../org/opensearch/search/internal/ContextIndexSearcher.java  | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 671631be0b547..aa52a4b2d763b 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -92,9 +92,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
 
     private final LongPredicate acceptedGlobalOrdinals;
     private final long valueCount;
-
-    private String fieldName;
-
+    private final String fieldName;
     private Weight weight;
     private final GlobalOrdLookupFunction lookupGlobalOrd;
     protected final CollectionStrategy collectionStrategy;
diff --git a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
index 275a75c33406c..ec3ed2332d0b8 100644
--- a/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
+++ b/server/src/main/java/org/opensearch/search/internal/ContextIndexSearcher.java
@@ -309,9 +309,6 @@ private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collecto
             // See please https://github.com/apache/lucene/pull/964
             collector.setWeight(weight);
             leafCollector = collector.getLeafCollector(ctx);
-            if (leafCollector == null) {
-                return;
-            }
         } catch (CollectionTerminatedException e) {
             // there is no doc of interest in this reader context
             // continue with the following leaf

From f5b7643a68facd3017952c4f0e1b73d18509fba5 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 2 Feb 2024 16:58:45 -0800
Subject: [PATCH 10/30] fixing weight null check

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../terms/GlobalOrdinalsStringTermsAggregator.java  | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index aa52a4b2d763b..d81a11c7cf7d3 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -170,21 +170,16 @@ LeafBucketCollector termDocFreqCollector(
         SortedSetDocValues globalOrds,
         BiConsumer<Long, Integer> ordCountConsumer
     ) throws IOException {
-        if (weight == null) {
-            // Calculate weight if not assigned previously
-            this.weight = context.searcher().createWeight(context.query(), ScoreMode.COMPLETE_NO_SCORES, 1f);
+        if (weight == null || weight.count(ctx) != ctx.reader().maxDoc()) {
+            // Weight not assigned or top-level query does not match all docs in the segment.
+            return null;
         }
 
         if (weight.count(ctx) == 0) {
-            // No documents matches top level query on this segment, we can skip it
+            // No documents matches top level query on this segment, we can skip the segment
             return LeafBucketCollector.NO_OP_COLLECTOR;
         }
 
-        if (weight.count(ctx) != ctx.reader().maxDoc()) {
-            // Top-level query does not match all docs in this segment.
-            return null;
-        }
-
         Terms segmentTerms = ctx.reader().terms(this.fieldName);
         if (segmentTerms == null) {
             // Field is not indexed.

From 70571a7586f4151f5acd621ec49ce3cf83747e08 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 5 Feb 2024 12:25:42 -0800
Subject: [PATCH 11/30] spotless apply

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index d81a11c7cf7d3..320c684d933db 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -40,7 +40,6 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -145,8 +144,9 @@ public GlobalOrdinalsStringTermsAggregator(
                 return new DenseGlobalOrds();
             });
         }
-            this.fieldName = (valuesSource instanceof ValuesSource.Bytes.WithOrdinals.FieldData) ?
-                ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName() : null;
+        this.fieldName = (valuesSource instanceof ValuesSource.Bytes.WithOrdinals.FieldData)
+            ? ((ValuesSource.Bytes.WithOrdinals.FieldData) valuesSource).getIndexFieldName()
+            : null;
     }
 
     String descriptCollectionStrategy() {

From 3e8f96d6f6c824a06fa6a69fcaa783b285f42f05 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 5 Feb 2024 14:28:35 -0800
Subject: [PATCH 12/30] Lowcardinality case rectified - was not collecing
 buckets correctly

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java   | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 320c684d933db..014460837c491 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -428,11 +428,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
 
-            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
-                ctx,
-                segmentOrds,
-                (ord, docCount) -> segmentDocCounts.increment(ord + 1, docCount)
-            );
+            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(ctx, segmentOrds, this::incrementBucketDocCount);
             if (termDocFreqCollector != null) {
                 return termDocFreqCollector;
             }

From 98a33cdf21d490fd5e34f40e2d5ebab36f28836a Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 8 Feb 2024 13:29:54 -0800
Subject: [PATCH 13/30] Fix test failures for termintate_early cases

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  |  3 +-
 .../search/query/QueryPhaseTests.java         | 28 +++++++++++++++----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 014460837c491..bbf5b05396a48 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -171,7 +171,8 @@ LeafBucketCollector termDocFreqCollector(
         BiConsumer<Long, Integer> ordCountConsumer
     ) throws IOException {
         if (weight == null || weight.count(ctx) != ctx.reader().maxDoc()) {
-            // Weight not assigned or top-level query does not match all docs in the segment.
+            // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and
+            // top-level query matches all docs in the segment
             return null;
         }
 
diff --git a/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
index d0e01c5461c79..4bd4d406e4391 100644
--- a/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
+++ b/server/src/test/java/org/opensearch/search/query/QueryPhaseTests.java
@@ -122,6 +122,7 @@
 import java.util.concurrent.TimeUnit;
 
 import static org.opensearch.search.query.TopDocsCollectorContext.hasInfMaxScore;
+import static org.hamcrest.Matchers.allOf;
 import static org.hamcrest.Matchers.anyOf;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
@@ -437,10 +438,16 @@ public void testTerminateAfterEarlyTermination() throws Exception {
             assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
             assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(1));
 
+            // Do not expect an exact match when terminate_after is used in conjunction to size = 0 as an optimization introduced by
+            // https://issues.apache.org/jira/browse/LUCENE-10620 can produce a total hit count >= terminated_after, because
+            // TotalHitCountCollector is used in this case as part of Weight#count() optimization
             context.setSize(0);
             QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
             assertTrue(context.queryResult().terminatedEarly());
-            assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
+            assertThat(
+                context.queryResult().topDocs().topDocs.totalHits.value,
+                allOf(greaterThanOrEqualTo(1L), lessThanOrEqualTo((long) numDocs))
+            );
             assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(0));
         }
 
@@ -466,7 +473,10 @@ public void testTerminateAfterEarlyTermination() throws Exception {
             context.parsedQuery(new ParsedQuery(bq));
             QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
             assertTrue(context.queryResult().terminatedEarly());
-            assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
+            assertThat(
+                context.queryResult().topDocs().topDocs.totalHits.value,
+                allOf(greaterThanOrEqualTo(1L), lessThanOrEqualTo((long) numDocs))
+            );
             assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(0));
         }
         {
@@ -486,9 +496,12 @@ public void testTerminateAfterEarlyTermination() throws Exception {
             context.queryCollectorManagers().put(TotalHitCountCollector.class, manager);
             QueryPhase.executeInternal(context.withCleanQueryResult(), queryPhaseSearcher);
             assertTrue(context.queryResult().terminatedEarly());
-            assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(1L));
+            assertThat(
+                context.queryResult().topDocs().topDocs.totalHits.value,
+                allOf(greaterThanOrEqualTo(1L), lessThanOrEqualTo((long) numDocs))
+            );
             assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(0));
-            assertThat(manager.getTotalHits(), equalTo(1));
+            assertThat(manager.getTotalHits(), allOf(greaterThanOrEqualTo(1), lessThanOrEqualTo(numDocs)));
         }
 
         // tests with trackTotalHits and terminateAfter
@@ -503,7 +516,10 @@ public void testTerminateAfterEarlyTermination() throws Exception {
             if (trackTotalHits == -1) {
                 assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo(0L));
             } else {
-                assertThat(context.queryResult().topDocs().topDocs.totalHits.value, equalTo((long) Math.min(trackTotalHits, 10)));
+                assertThat(
+                    context.queryResult().topDocs().topDocs.totalHits.value,
+                    allOf(greaterThanOrEqualTo(Math.min(trackTotalHits, 10L)), lessThanOrEqualTo((long) numDocs))
+                );
             }
             assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(0));
             // The concurrent search terminates the collection when the number of hits is reached by each
@@ -511,7 +527,7 @@ public void testTerminateAfterEarlyTermination() throws Exception {
             // slices (as the unit of concurrency). To address that, we have to use the shared global state,
             // much as HitsThresholdChecker does.
             if (executor == null) {
-                assertThat(manager.getTotalHits(), equalTo(10));
+                assertThat(manager.getTotalHits(), allOf(greaterThanOrEqualTo(Math.min(trackTotalHits, 10)), lessThanOrEqualTo(numDocs)));
             }
         }
 

From 85e9fe285f2bffa590eaae7533de301c0e6629d6 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 14 Feb 2024 12:01:26 -0800
Subject: [PATCH 14/30] Optimize only for Standard Term Results

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../terms/GlobalOrdinalsStringTermsAggregator.java    | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index bbf5b05396a48..0544a73a8ea41 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -220,7 +220,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
 
-        if (collectionStrategy instanceof DenseGlobalOrds && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
+        if (collectionStrategy instanceof DenseGlobalOrds && this.resultStrategy instanceof StandardTermsResults && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
             LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
                 ctx,
                 globalOrds,
@@ -429,9 +429,12 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
 
-            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(ctx, segmentOrds, this::incrementBucketDocCount);
-            if (termDocFreqCollector != null) {
-                return termDocFreqCollector;
+
+            if (this.resultStrategy instanceof StandardTermsResults) {
+                LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(ctx, segmentOrds, this::incrementBucketDocCount);
+                if (termDocFreqCollector != null) {
+                    return termDocFreqCollector;
+                }
             }
 
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);

From 2673a1fc612c83222232205779040e2db87feb06 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Wed, 14 Feb 2024 12:05:51 -0800
Subject: [PATCH 15/30] spotless fix

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 0544a73a8ea41..a9a4ab2157826 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -220,7 +220,9 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
 
-        if (collectionStrategy instanceof DenseGlobalOrds && this.resultStrategy instanceof StandardTermsResults && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
+        if (collectionStrategy instanceof DenseGlobalOrds
+            && this.resultStrategy instanceof StandardTermsResults
+            && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
             LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
                 ctx,
                 globalOrds,
@@ -429,7 +431,6 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
 
-
             if (this.resultStrategy instanceof StandardTermsResults) {
                 LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(ctx, segmentOrds, this::incrementBucketDocCount);
                 if (termDocFreqCollector != null) {

From ab3f9f940fec24303fbf5efd5f12621ac234cd0a Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 15 Feb 2024 13:51:10 -0800
Subject: [PATCH 16/30] fix low cardinality case

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../terms/GlobalOrdinalsStringTermsAggregator.java   | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index a9a4ab2157826..a69e55181fcd2 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -430,17 +430,23 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
             segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
+            mapping = valuesSource.globalOrdinalsMapping(ctx);
 
             if (this.resultStrategy instanceof StandardTermsResults) {
-                LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(ctx, segmentOrds, this::incrementBucketDocCount);
+                LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
+                    ctx,
+                    segmentOrds,
+                    (ord, docCount) -> incrementBucketDocCount(
+                        collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)),
+                        docCount
+                    )
+                );
                 if (termDocFreqCollector != null) {
                     return termDocFreqCollector;
                 }
             }
 
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
-            mapping = valuesSource.globalOrdinalsMapping(ctx);
-            // Dense mode doesn't support include/exclude so we don't have to check it here.
             if (singleValues != null) {
                 segmentsWithSingleValuedOrds++;
                 return resultStrategy.wrapCollector(new LeafBucketCollectorBase(sub, segmentOrds) {

From 52fd9483548ef028b13787f9ae9557dd02b0e9f6 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 15 Feb 2024 14:52:05 -0800
Subject: [PATCH 17/30] Remove extra checks

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 23 +++++++------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index a69e55181fcd2..c5bc7caa718a0 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -220,9 +220,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
 
-        if (collectionStrategy instanceof DenseGlobalOrds
-            && this.resultStrategy instanceof StandardTermsResults
-            && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
+        if (collectionStrategy instanceof DenseGlobalOrds && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
             LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
                 ctx,
                 globalOrds,
@@ -432,18 +430,13 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
             mapping = valuesSource.globalOrdinalsMapping(ctx);
 
-            if (this.resultStrategy instanceof StandardTermsResults) {
-                LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
-                    ctx,
-                    segmentOrds,
-                    (ord, docCount) -> incrementBucketDocCount(
-                        collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)),
-                        docCount
-                    )
-                );
-                if (termDocFreqCollector != null) {
-                    return termDocFreqCollector;
-                }
+            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
+                ctx,
+                segmentOrds,
+                (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)), docCount)
+            );
+            if (termDocFreqCollector != null) {
+                return termDocFreqCollector;
             }
 
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);

From ebf4ce58b3ec6dadc29bb4f3588e80c93a040a2f Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 15 Feb 2024 15:21:53 -0800
Subject: [PATCH 18/30] Minor optimization

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index c5bc7caa718a0..70fbe30b0a558 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -433,7 +433,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
                 ctx,
                 segmentOrds,
-                (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)), docCount)
+                (ord, docCount) -> incrementBucketDocCount(mapping.applyAsLong(ord), docCount)
             );
             if (termDocFreqCollector != null) {
                 return termDocFreqCollector;

From b14c03e3df61894e29f60d0241e5e82c1c339b90 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 15 Feb 2024 18:07:09 -0800
Subject: [PATCH 19/30] Revert "Minor optimization"

This reverts commit c17f93a216484d03049d89df3cbdd81b39f58e67.

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 70fbe30b0a558..c5bc7caa718a0 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -433,7 +433,7 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
                 ctx,
                 segmentOrds,
-                (ord, docCount) -> incrementBucketDocCount(mapping.applyAsLong(ord), docCount)
+                (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)), docCount)
             );
             if (termDocFreqCollector != null) {
                 return termDocFreqCollector;

From 318e4ea1cd79d289ac03b1c257b68cbcc2eccb71 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 16 Feb 2024 00:04:42 -0800
Subject: [PATCH 20/30] fix significant terms aggregator condition

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index c5bc7caa718a0..39aae29a36e5f 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -220,7 +220,9 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
         SortedSetDocValues globalOrds = valuesSource.globalOrdinalsValues(ctx);
         collectionStrategy.globalOrdsReady(globalOrds);
 
-        if (collectionStrategy instanceof DenseGlobalOrds && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
+        if (collectionStrategy instanceof DenseGlobalOrds
+            && this.resultStrategy instanceof StandardTermsResults
+            && sub == LeafBucketCollector.NO_OP_COLLECTOR) {
             LeafBucketCollector termDocFreqCollector = termDocFreqCollector(
                 ctx,
                 globalOrds,
@@ -430,13 +432,15 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol
             assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
             mapping = valuesSource.globalOrdinalsMapping(ctx);
 
-            LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
-                ctx,
-                segmentOrds,
-                (ord, docCount) -> incrementBucketDocCount(collectionStrategy.globalOrdToBucketOrd(0, mapping.applyAsLong(ord)), docCount)
-            );
-            if (termDocFreqCollector != null) {
-                return termDocFreqCollector;
+            if (this.resultStrategy instanceof StandardTermsResults) {
+                LeafBucketCollector termDocFreqCollector = this.termDocFreqCollector(
+                    ctx,
+                    segmentOrds,
+                    (ord, docCount) -> incrementBucketDocCount(mapping.applyAsLong(ord), docCount)
+                );
+                if (termDocFreqCollector != null) {
+                    return termDocFreqCollector;
+                }
             }
 
             final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);

From 1c7b400ec3e1a2e28a154667d901ffb526e46d1b Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 22 Feb 2024 17:26:54 -0800
Subject: [PATCH 21/30] Add UTs for optimization

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  |   1 +
 .../bucket/terms/TermsAggregatorTests.java    | 137 +++++++++++++-----
 2 files changed, 103 insertions(+), 35 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 39aae29a36e5f..bb85344d5c4ee 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -248,6 +248,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                             return;
                         }
                         int globalOrd = singleValues.ordValue();
+                        //Hello
                         collectionStrategy.collectGlobalOrd(owningBucketOrd, doc, globalOrd, sub);
                     }
                 });
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index 80744ecde4d69..c2d27fbf3600e 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -52,6 +52,7 @@
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.NumericUtils;
+import org.opensearch.common.TriConsumer;
 import org.opensearch.common.geo.GeoPoint;
 import org.opensearch.common.network.InetAddresses;
 import org.opensearch.common.settings.Settings;
@@ -143,6 +144,18 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     private static final String STRING_SCRIPT_NAME = "string_script";
     private static final String STRING_SCRIPT_OUTPUT = "Orange";
 
+    private static final Consumer<TermsAggregator> DEFAULT_POST_COLLECTION = termsAggregator -> {
+        try {
+            termsAggregator.postCollection();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    };
+
+    // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
+    // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
+    private static final Consumer<TermsAggregator> NOOP_POST_COLLECTION_CONSUMER = termsAggregator -> {};
+
     @Override
     protected MapperService mapperServiceMock() {
         MapperService mapperService = mock(MapperService.class);
@@ -257,24 +270,54 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
         directory.close();
     }
 
-    public void testSimple() throws Exception {
+    /**
+     * This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
+     * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
+     */
+    public void testSimpleAggregation() throws Exception {
+        testSimple(
+            (document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
+            DEFAULT_POST_COLLECTION
+        );
+    }
+
+    /**
+     * This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
+     * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
+     */
+    public void testSimpleAggregationWithStoredValues() throws Exception {
+        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
+        // This also verifies that the bucket count is completed without running postCollection()
+        testSimple((document, field, value) -> {
+            document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
+            document.add(new StringField(field, value, Field.Store.NO));
+        }, NOOP_POST_COLLECTION_CONSUMER);
+
+    }
+
+    /**
+     * This is a utility method to test out string terms aggregation
+     * @param addFieldConsumer a function that determines how a field is added to the document
+     */
+    private void testSimple(TriConsumer<Document, String, String> addFieldConsumer, Consumer<TermsAggregator> postCollectionConsumer)
+        throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
-                document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
-                document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
+                addFieldConsumer.apply(document, "string", "a");
+                addFieldConsumer.apply(document, "string", "b");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("string", new BytesRef("")));
-                document.add(new SortedSetDocValuesField("string", new BytesRef("c")));
-                document.add(new SortedSetDocValuesField("string", new BytesRef("a")));
+                addFieldConsumer.apply(document, "string", "");
+                addFieldConsumer.apply(document, "string", "c");
+                addFieldConsumer.apply(document, "string", "a");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("string", new BytesRef("b")));
-                document.add(new SortedSetDocValuesField("string", new BytesRef("d")));
+                addFieldConsumer.apply(document, "string", "b");
+                addFieldConsumer.apply(document, "string", "d");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("string", new BytesRef("")));
+                addFieldConsumer.apply(document, "string", "");
                 indexWriter.addDocument(document);
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);
@@ -287,7 +330,7 @@ public void testSimple() throws Exception {
                         TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                         aggregator.preCollection();
                         indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                        aggregator.postCollection();
+                        postCollectionConsumer.accept(aggregator);
                         Terms result = reduce(aggregator);
                         assertEquals(5, result.getBuckets().size());
                         assertEquals("", result.getBuckets().get(0).getKeyAsString());
@@ -307,38 +350,63 @@ public void testSimple() throws Exception {
         }
     }
 
+    /**
+     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
+     * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
+     */
     public void testStringIncludeExclude() throws Exception {
+        testStringIncludeExclude(
+            (document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
+            DEFAULT_POST_COLLECTION
+        );
+    }
+
+    /**
+     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
+     * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
+     */
+    public void testStringIncludeExcludeWithStoredValues() throws Exception {
+        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used
+        // This also verifies that the bucket count is completed without running postCollection()
+        testStringIncludeExclude((document, field, value) -> {
+            document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
+            document.add(new StringField(field, value, Field.Store.NO));
+        }, NOOP_POST_COLLECTION_CONSUMER);
+    }
+
+    private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<TermsAggregator> postCollectionConsumer)
+        throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val000")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val001")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val001")));
+                addField.apply(document, "mv_field", "val000");
+                addField.apply(document, "mv_field", "val001");
+                addField.apply(document, "sv_field", "val001");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val002")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val003")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val003")));
+                addField.apply(document, "mv_field", "val002");
+                addField.apply(document, "mv_field", "val003");
+                addField.apply(document, "sv_field", "val003");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val004")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val005")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val005")));
+                addField.apply(document, "mv_field", "val004");
+                addField.apply(document, "mv_field", "val005");
+                addField.apply(document, "sv_field", "val005");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val006")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val007")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val007")));
+                addField.apply(document, "mv_field", "val006");
+                addField.apply(document, "mv_field", "val007");
+                addField.apply(document, "sv_field", "val007");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val008")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val009")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val009")));
+                addField.apply(document, "mv_field", "val008");
+                addField.apply(document, "mv_field", "val009");
+                addField.apply(document, "sv_field", "val009");
                 indexWriter.addDocument(document);
                 document = new Document();
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val010")));
-                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val011")));
-                document.add(new SortedDocValuesField("sv_field", new BytesRef("val011")));
+                addField.apply(document, "mv_field", "val010");
+                addField.apply(document, "mv_field", "val011");
+                addField.apply(document, "sv_field", "val011");
                 indexWriter.addDocument(document);
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);
@@ -355,7 +423,7 @@ public void testStringIncludeExclude() throws Exception {
                     TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     Terms result = reduce(aggregator);
                     assertEquals(10, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
@@ -390,7 +458,7 @@ public void testStringIncludeExclude() throws Exception {
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType2);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     result = reduce(aggregator);
                     assertEquals(5, result.getBuckets().size());
                     assertEquals("val001", result.getBuckets().get(0).getKeyAsString());
@@ -414,7 +482,7 @@ public void testStringIncludeExclude() throws Exception {
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     result = reduce(aggregator);
                     assertEquals(8, result.getBuckets().size());
                     assertEquals("val002", result.getBuckets().get(0).getKeyAsString());
@@ -443,7 +511,7 @@ public void testStringIncludeExclude() throws Exception {
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val010", result.getBuckets().get(0).getKeyAsString());
@@ -460,7 +528,7 @@ public void testStringIncludeExclude() throws Exception {
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
@@ -492,7 +560,7 @@ public void testStringIncludeExclude() throws Exception {
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    aggregator.postCollection();
+                    postCollectionConsumer.accept(aggregator);
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
@@ -1543,5 +1611,4 @@ private <T extends InternalAggregation> T reduce(Aggregator agg) throws IOExcept
         doAssertReducedMultiBucketConsumer(result, reduceBucketConsumer);
         return result;
     }
-
 }

From f3187ea1c1fe40efa9a2054000aefbca03ef9b43 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 22 Feb 2024 17:56:15 -0800
Subject: [PATCH 22/30] invalid comment correction

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index bb85344d5c4ee..39aae29a36e5f 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -248,7 +248,6 @@ public void collect(int doc, long owningBucketOrd) throws IOException {
                             return;
                         }
                         int globalOrd = singleValues.ordValue();
-                        //Hello
                         collectionStrategy.collectGlobalOrd(owningBucketOrd, doc, globalOrd, sub);
                     }
                 });

From 6b70df55873c1f452678f550296b53cf95874859 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 22 Feb 2024 22:21:28 -0800
Subject: [PATCH 23/30] refactor

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../search/aggregations/bucket/terms/TermsAggregatorTests.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index c2d27fbf3600e..51dc53bba4f02 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -154,7 +154,7 @@ public class TermsAggregatorTests extends AggregatorTestCase {
 
     // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
     // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
-    private static final Consumer<TermsAggregator> NOOP_POST_COLLECTION_CONSUMER = termsAggregator -> {};
+    private static final Consumer<TermsAggregator> NOOP_POST_COLLECTION = termsAggregator -> {};
 
     @Override
     protected MapperService mapperServiceMock() {

From ca820f05bdc479a8f3a5f35f929b5e1e789d720b Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Thu, 22 Feb 2024 22:36:02 -0800
Subject: [PATCH 24/30] typo fix

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../aggregations/bucket/terms/TermsAggregatorTests.java       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index 51dc53bba4f02..91fc3b54faa3f 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -291,7 +291,7 @@ public void testSimpleAggregationWithStoredValues() throws Exception {
         testSimple((document, field, value) -> {
             document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
             document.add(new StringField(field, value, Field.Store.NO));
-        }, NOOP_POST_COLLECTION_CONSUMER);
+        }, NOOP_POST_COLLECTION);
 
     }
 
@@ -371,7 +371,7 @@ public void testStringIncludeExcludeWithStoredValues() throws Exception {
         testStringIncludeExclude((document, field, value) -> {
             document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
             document.add(new StringField(field, value, Field.Store.NO));
-        }, NOOP_POST_COLLECTION_CONSUMER);
+        }, NOOP_POST_COLLECTION);
     }
 
     private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<TermsAggregator> postCollectionConsumer)

From 9a82b348ea9ae2b47bff8d9dc3aa1e8455722dfe Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 26 Feb 2024 15:44:18 -0800
Subject: [PATCH 25/30] Increase code coverage and minor refactoring

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  | 23 +++--
 .../terms/KeywordTermsAggregatorTests.java    | 95 ++++++++++++++-----
 .../bucket/terms/TermsAggregatorTests.java    | 30 ++----
 .../aggregations/AggregatorTestCase.java      | 50 +++++++++-
 4 files changed, 137 insertions(+), 61 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 39aae29a36e5f..2a83710117e74 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -158,7 +158,7 @@ public void setWeight(Weight weight) {
     }
 
     /**
-     Collects term frequencies for a given field from a LeafReaderContext.
+     Collects term frequencies for a given field from a LeafReaderContext directly from stored segment terms
      @param ctx The LeafReaderContext to collect terms from
      @param globalOrds The SortedSetDocValues for the field's ordinals
      @param ordCountConsumer A consumer to accept collected term frequencies
@@ -170,15 +170,18 @@ LeafBucketCollector termDocFreqCollector(
         SortedSetDocValues globalOrds,
         BiConsumer<Long, Integer> ordCountConsumer
     ) throws IOException {
-        if (weight == null || weight.count(ctx) != ctx.reader().maxDoc()) {
-            // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and
-            // top-level query matches all docs in the segment
+        if (weight == null) {
+            // Weight not assigned - cannot use this optimization
             return null;
-        }
-
-        if (weight.count(ctx) == 0) {
-            // No documents matches top level query on this segment, we can skip the segment
-            return LeafBucketCollector.NO_OP_COLLECTOR;
+        } else {
+            if (weight.count(ctx) == 0) {
+                // No documents matches top level query on this segment, we can skip the segment entirely
+                return LeafBucketCollector.NO_OP_COLLECTOR;
+            } else if (weight.count(ctx) != ctx.reader().maxDoc()) {
+                // weight.count(ctx) == ctx.reader().maxDoc() implies there are no deleted documents and
+                // top-level query matches all docs in the segment
+                return null;
+            }
         }
 
         Terms segmentTerms = ctx.reader().terms(this.fieldName);
@@ -198,6 +201,8 @@ LeafBucketCollector termDocFreqCollector(
         TermsEnum globalOrdinalTermsEnum = globalOrds.termsEnum();
         BytesRef ordinalTerm = globalOrdinalTermsEnum.next();
 
+        // Iterate over the terms in the segment, look for matches in the global ordinal terms,
+        // and increment bucket count when segment terms match global ordinal terms.
         while (indexTerm != null && ordinalTerm != null) {
             int compare = indexTerm.compareTo(ordinalTerm);
             if (compare == 0) {
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
index 4229361aa7f46..5d1e02116f189 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
@@ -42,8 +42,10 @@
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.util.BytesRef;
+import org.opensearch.common.TriConsumer;
 import org.opensearch.index.mapper.KeywordFieldMapper;
 import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.search.aggregations.Aggregator;
 import org.opensearch.search.aggregations.AggregatorTestCase;
 import org.opensearch.search.aggregations.support.ValueType;
 
@@ -68,61 +70,103 @@ public class KeywordTermsAggregatorTests extends AggregatorTestCase {
         dataset = d;
     }
 
+    private static Consumer<InternalMappedTerms> VERIFY_MATCH_ALL_DOCS = agg -> {
+        assertEquals(9, agg.getBuckets().size());
+        for (int i = 0; i < 9; i++) {
+            StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
+            assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i)));
+            assertThat(bucket.getDocCount(), equalTo(9L - i));
+        }
+    };
+
+    private static Query MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery();
+
+    private static Query MATCH_NO_DOCS_QUERY = new MatchNoDocsQuery();
+
     public void testMatchNoDocs() throws IOException {
         testSearchCase(
-            new MatchNoDocsQuery(),
+            ADD_SORTED_FIELD_NO_STORE,
+            MATCH_NO_DOCS_QUERY,
             dataset,
             aggregation -> aggregation.field(KEYWORD_FIELD),
             agg -> assertEquals(0, agg.getBuckets().size()),
-            null // without type hint
+            null,        // without type hint
+            DEFAULT_POST_COLLECTION
         );
 
         testSearchCase(
-            new MatchNoDocsQuery(),
+            ADD_SORTED_FIELD_NO_STORE,
+            MATCH_NO_DOCS_QUERY,
             dataset,
             aggregation -> aggregation.field(KEYWORD_FIELD),
             agg -> assertEquals(0, agg.getBuckets().size()),
-            ValueType.STRING // with type hint
+            ValueType.STRING,        // with type hint
+            DEFAULT_POST_COLLECTION
         );
     }
 
     public void testMatchAllDocs() throws IOException {
-        Query query = new MatchAllDocsQuery();
-
-        testSearchCase(query, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> {
-            assertEquals(9, agg.getBuckets().size());
-            for (int i = 0; i < 9; i++) {
-                StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
-                assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i)));
-                assertThat(bucket.getDocCount(), equalTo(9L - i));
-            }
-        },
-            null // without type hint
+        testSearchCase(
+            ADD_SORTED_FIELD_NO_STORE,
+            MATCH_ALL_DOCS_QUERY,
+            dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD),
+            VERIFY_MATCH_ALL_DOCS,
+            null,        // without type hint
+            DEFAULT_POST_COLLECTION
         );
 
-        testSearchCase(query, dataset, aggregation -> aggregation.field(KEYWORD_FIELD), agg -> {
-            assertEquals(9, agg.getBuckets().size());
-            for (int i = 0; i < 9; i++) {
-                StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
-                assertThat(bucket.getKey(), equalTo(String.valueOf(9L - i)));
-                assertThat(bucket.getDocCount(), equalTo(9L - i));
-            }
-        },
-            ValueType.STRING // with type hint
+        testSearchCase(
+            ADD_SORTED_FIELD_NO_STORE,
+            MATCH_ALL_DOCS_QUERY,
+            dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD),
+            VERIFY_MATCH_ALL_DOCS,
+            ValueType.STRING,        // with type hint
+            DEFAULT_POST_COLLECTION
+        );
+    }
+
+    public void testMatchAllDocsWithStoredValues() throws IOException {
+        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used,
+        // therefore using NOOP_POST_COLLECTION
+        // This also verifies that the bucket count is completed without running postCollection()
+
+        testSearchCase(
+            ADD_SORTED_FIELD_STORE,
+            MATCH_ALL_DOCS_QUERY,
+            dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD),
+            VERIFY_MATCH_ALL_DOCS,
+            null,            // without type hint
+            NOOP_POST_COLLECTION
+        );
+
+        testSearchCase(
+            ADD_SORTED_FIELD_STORE,
+            MATCH_ALL_DOCS_QUERY,
+            dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD),
+            VERIFY_MATCH_ALL_DOCS,
+            ValueType.STRING,        // with type hint
+            NOOP_POST_COLLECTION
         );
     }
 
     private void testSearchCase(
+        TriConsumer<Document, String, String> addField,
         Query query,
         List<String> dataset,
         Consumer<TermsAggregationBuilder> configure,
         Consumer<InternalMappedTerms> verify,
-        ValueType valueType
+        ValueType valueType,
+        Consumer<Aggregator> postCollectionConsumer
     ) throws IOException {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
                 for (String value : dataset) {
+                    addField.apply(document, KEYWORD_FIELD, value);
                     document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(value)));
                     indexWriter.addDocument(document);
                     document.clear();
@@ -147,5 +191,4 @@ private void testSearchCase(
             }
         }
     }
-
 }
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index 91fc3b54faa3f..93939657b6981 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -144,18 +144,6 @@ public class TermsAggregatorTests extends AggregatorTestCase {
     private static final String STRING_SCRIPT_NAME = "string_script";
     private static final String STRING_SCRIPT_OUTPUT = "Orange";
 
-    private static final Consumer<TermsAggregator> DEFAULT_POST_COLLECTION = termsAggregator -> {
-        try {
-            termsAggregator.postCollection();
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-    };
-
-    // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
-    // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
-    private static final Consumer<TermsAggregator> NOOP_POST_COLLECTION = termsAggregator -> {};
-
     @Override
     protected MapperService mapperServiceMock() {
         MapperService mapperService = mock(MapperService.class);
@@ -275,10 +263,7 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
      * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
      */
     public void testSimpleAggregation() throws Exception {
-        testSimple(
-            (document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
-            DEFAULT_POST_COLLECTION
-        );
+        testSimple(ADD_SORTED_FIELD_NO_STORE, DEFAULT_POST_COLLECTION);
     }
 
     /**
@@ -286,20 +271,17 @@ public void testSimpleAggregation() throws Exception {
      * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
      */
     public void testSimpleAggregationWithStoredValues() throws Exception {
-        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
+        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used,
+        // therefore using NOOP_POST_COLLECTION
         // This also verifies that the bucket count is completed without running postCollection()
-        testSimple((document, field, value) -> {
-            document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
-            document.add(new StringField(field, value, Field.Store.NO));
-        }, NOOP_POST_COLLECTION);
-
+        testSimple(ADD_SORTED_FIELD_STORE, NOOP_POST_COLLECTION);
     }
 
     /**
      * This is a utility method to test out string terms aggregation
      * @param addFieldConsumer a function that determines how a field is added to the document
      */
-    private void testSimple(TriConsumer<Document, String, String> addFieldConsumer, Consumer<TermsAggregator> postCollectionConsumer)
+    private void testSimple(TriConsumer<Document, String, String> addFieldConsumer, Consumer<Aggregator> postCollectionConsumer)
         throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
@@ -374,7 +356,7 @@ public void testStringIncludeExcludeWithStoredValues() throws Exception {
         }, NOOP_POST_COLLECTION);
     }
 
-    private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<TermsAggregator> postCollectionConsumer)
+    private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<Aggregator> postCollectionConsumer)
         throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
index ac0447dbebf7e..ec928e1122bb3 100644
--- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
@@ -34,11 +34,13 @@
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.InetAddressPoint;
 import org.apache.lucene.document.LatLonDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.CompositeReaderContext;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -62,6 +64,7 @@
 import org.opensearch.Version;
 import org.opensearch.cluster.metadata.IndexMetadata;
 import org.opensearch.common.CheckedConsumer;
+import org.opensearch.common.TriConsumer;
 import org.opensearch.common.TriFunction;
 import org.opensearch.common.lease.Releasable;
 import org.opensearch.common.lease.Releasables;
@@ -121,6 +124,7 @@
 import org.opensearch.search.aggregations.AggregatorFactories.Builder;
 import org.opensearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer;
 import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregator;
 import org.opensearch.search.aggregations.metrics.MetricsAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
@@ -178,6 +182,26 @@ public abstract class AggregatorTestCase extends OpenSearchTestCase {
     // A list of field types that should not be tested, or are not currently supported
     private static List<String> TYPE_TEST_DENYLIST;
 
+    protected static final Consumer<Aggregator> DEFAULT_POST_COLLECTION = termsAggregator -> {
+        try {
+            termsAggregator.postCollection();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    };
+
+    // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
+    // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
+    protected static final Consumer<Aggregator> NOOP_POST_COLLECTION = termsAggregator -> {};
+
+    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_NO_STORE = (document, field, value) ->
+        document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
+
+    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_STORE = (document, field, value) -> {
+        document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
+        document.add(new StringField(field, value, Field.Store.NO));
+    };
+
     static {
         List<String> denylist = new ArrayList<>();
         denylist.add(ObjectMapper.CONTENT_TYPE); // Cannot aggregate objects
@@ -484,6 +508,16 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
     }
 
+    protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
+        IndexSearcher searcher,
+        Query query,
+        AggregationBuilder builder,
+        Consumer<Aggregator> postCollectionConsumer,
+        MappedFieldType... fieldTypes
+    ) throws IOException {
+        return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, postCollectionConsumer, fieldTypes);
+    }
+
     protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
         IndexSettings indexSettings,
         IndexSearcher searcher,
@@ -504,6 +538,17 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, fieldTypes);
     }
 
+    protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
+        IndexSettings indexSettings,
+        IndexSearcher searcher,
+        Query query,
+        AggregationBuilder builder,
+        int maxBucket,
+        MappedFieldType... fieldTypes
+    ) throws IOException {
+        return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, DEFAULT_POST_COLLECTION, fieldTypes);
+    }
+
     /**
      * Collects all documents that match the provided query {@link Query} and
      * returns the reduced {@link InternalAggregation}.
@@ -518,6 +563,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         Query query,
         AggregationBuilder builder,
         int maxBucket,
+        Consumer<Aggregator> postCollectionConsumer,
         MappedFieldType... fieldTypes
     ) throws IOException {
         final IndexReaderContext ctx = searcher.getTopReaderContext();
@@ -548,13 +594,13 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
                 a.preCollection();
                 Weight weight = subSearcher.createWeight(rewritten, ScoreMode.COMPLETE, 1f);
                 subSearcher.search(weight, a);
-                a.postCollection();
+                postCollectionConsumer.accept(a);
                 aggs.add(a.buildTopLevel());
             }
         } else {
             root.preCollection();
             searcher.search(rewritten, root);
-            root.postCollection();
+            postCollectionConsumer.accept(root);
             aggs.add(root.buildTopLevel());
         }
 

From 3c98cb2ebdf2589e9ad9d0dc5a31b52bf7a21bfc Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 26 Feb 2024 16:05:42 -0800
Subject: [PATCH 26/30] Spotless in framework

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../opensearch/search/aggregations/AggregatorTestCase.java  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
index ec928e1122bb3..9fdae80bd1ada 100644
--- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
@@ -124,7 +124,6 @@
 import org.opensearch.search.aggregations.AggregatorFactories.Builder;
 import org.opensearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer;
 import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
-import org.opensearch.search.aggregations.bucket.terms.TermsAggregator;
 import org.opensearch.search.aggregations.metrics.MetricsAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
@@ -194,8 +193,9 @@ public abstract class AggregatorTestCase extends OpenSearchTestCase {
     // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
     protected static final Consumer<Aggregator> NOOP_POST_COLLECTION = termsAggregator -> {};
 
-    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_NO_STORE = (document, field, value) ->
-        document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
+    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_NO_STORE = (document, field, value) -> document.add(
+        new SortedSetDocValuesField(field, new BytesRef(value))
+    );
 
     protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_STORE = (document, field, value) -> {
         document.add(new SortedSetDocValuesField(field, new BytesRef(value)));

From 9be19a02f980dd0583e0be250f84e03696d1570d Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 8 Mar 2024 15:48:01 -0800
Subject: [PATCH 27/30] Test cases improvement

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../GlobalOrdinalsStringTermsAggregator.java  |  10 +-
 .../terms/KeywordTermsAggregatorTests.java    |  75 ++----
 .../bucket/terms/TermsAggregatorTests.java    | 213 ++++++++++--------
 .../aggregations/AggregatorTestCase.java      | 131 +++++++----
 4 files changed, 243 insertions(+), 186 deletions(-)

diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 2a83710117e74..15e538f01e632 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -40,6 +40,7 @@
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.CollectionTerminatedException;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -162,7 +163,7 @@ public void setWeight(Weight weight) {
      @param ctx The LeafReaderContext to collect terms from
      @param globalOrds The SortedSetDocValues for the field's ordinals
      @param ordCountConsumer A consumer to accept collected term frequencies
-     @return A no-operation LeafBucketCollector implementation, since collection is complete
+     @return A LeafBucketCollector implementation with collection termination, since collection is complete
      @throws IOException If an I/O error occurs during reading
      */
     LeafBucketCollector termDocFreqCollector(
@@ -217,7 +218,12 @@ LeafBucketCollector termDocFreqCollector(
                 ordinalTerm = globalOrdinalTermsEnum.next();
             }
         }
-        return LeafBucketCollector.NO_OP_COLLECTOR;
+        return new LeafBucketCollector() {
+            @Override
+            public void collect(int doc, long owningBucketOrd) throws IOException {
+                throw new CollectionTerminatedException();
+            }
+        };
     }
 
     @Override
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
index 5d1e02116f189..753644dce81d5 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/KeywordTermsAggregatorTests.java
@@ -32,7 +32,6 @@
 package org.opensearch.search.aggregations.bucket.terms;
 
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.IndexSearcher;
@@ -41,11 +40,9 @@
 import org.apache.lucene.search.Query;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
-import org.apache.lucene.util.BytesRef;
 import org.opensearch.common.TriConsumer;
 import org.opensearch.index.mapper.KeywordFieldMapper;
 import org.opensearch.index.mapper.MappedFieldType;
-import org.opensearch.search.aggregations.Aggregator;
 import org.opensearch.search.aggregations.AggregatorTestCase;
 import org.opensearch.search.aggregations.support.ValueType;
 
@@ -59,6 +56,8 @@
 public class KeywordTermsAggregatorTests extends AggregatorTestCase {
     private static final String KEYWORD_FIELD = "keyword";
 
+    private static final Consumer<TermsAggregationBuilder> CONFIGURE_KEYWORD_FIELD = agg -> agg.field(KEYWORD_FIELD);
+
     private static final List<String> dataset;
     static {
         List<String> d = new ArrayList<>(45);
@@ -70,7 +69,7 @@ public class KeywordTermsAggregatorTests extends AggregatorTestCase {
         dataset = d;
     }
 
-    private static Consumer<InternalMappedTerms> VERIFY_MATCH_ALL_DOCS = agg -> {
+    private static final Consumer<InternalMappedTerms> VERIFY_MATCH_ALL_DOCS = agg -> {
         assertEquals(9, agg.getBuckets().size());
         for (int i = 0; i < 9; i++) {
             StringTerms.Bucket bucket = (StringTerms.Bucket) agg.getBuckets().get(i);
@@ -79,77 +78,49 @@ public class KeywordTermsAggregatorTests extends AggregatorTestCase {
         }
     };
 
-    private static Query MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery();
+    private static final Consumer<InternalMappedTerms> VERIFY_MATCH_NO_DOCS = agg -> { assertEquals(0, agg.getBuckets().size()); };
+
+    private static final Query MATCH_ALL_DOCS_QUERY = new MatchAllDocsQuery();
 
-    private static Query MATCH_NO_DOCS_QUERY = new MatchNoDocsQuery();
+    private static final Query MATCH_NO_DOCS_QUERY = new MatchNoDocsQuery();
 
     public void testMatchNoDocs() throws IOException {
         testSearchCase(
-            ADD_SORTED_FIELD_NO_STORE,
+            ADD_SORTED_SET_FIELD_NOT_INDEXED,
             MATCH_NO_DOCS_QUERY,
             dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
-            agg -> assertEquals(0, agg.getBuckets().size()),
-            null,        // without type hint
-            DEFAULT_POST_COLLECTION
+            CONFIGURE_KEYWORD_FIELD,
+            VERIFY_MATCH_NO_DOCS,
+            null                   // without type hint
         );
 
         testSearchCase(
-            ADD_SORTED_FIELD_NO_STORE,
+            ADD_SORTED_SET_FIELD_NOT_INDEXED,
             MATCH_NO_DOCS_QUERY,
             dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
-            agg -> assertEquals(0, agg.getBuckets().size()),
-            ValueType.STRING,        // with type hint
-            DEFAULT_POST_COLLECTION
+            CONFIGURE_KEYWORD_FIELD,
+            VERIFY_MATCH_NO_DOCS,
+            ValueType.STRING       // with type hint
         );
     }
 
     public void testMatchAllDocs() throws IOException {
         testSearchCase(
-            ADD_SORTED_FIELD_NO_STORE,
-            MATCH_ALL_DOCS_QUERY,
-            dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
-            VERIFY_MATCH_ALL_DOCS,
-            null,        // without type hint
-            DEFAULT_POST_COLLECTION
-        );
-
-        testSearchCase(
-            ADD_SORTED_FIELD_NO_STORE,
-            MATCH_ALL_DOCS_QUERY,
-            dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
-            VERIFY_MATCH_ALL_DOCS,
-            ValueType.STRING,        // with type hint
-            DEFAULT_POST_COLLECTION
-        );
-    }
-
-    public void testMatchAllDocsWithStoredValues() throws IOException {
-        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used,
-        // therefore using NOOP_POST_COLLECTION
-        // This also verifies that the bucket count is completed without running postCollection()
-
-        testSearchCase(
-            ADD_SORTED_FIELD_STORE,
+            ADD_SORTED_SET_FIELD_NOT_INDEXED,
             MATCH_ALL_DOCS_QUERY,
             dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
+            CONFIGURE_KEYWORD_FIELD,
             VERIFY_MATCH_ALL_DOCS,
-            null,            // without type hint
-            NOOP_POST_COLLECTION
+            null                   // without type hint
         );
 
         testSearchCase(
-            ADD_SORTED_FIELD_STORE,
+            ADD_SORTED_SET_FIELD_NOT_INDEXED,
             MATCH_ALL_DOCS_QUERY,
             dataset,
-            aggregation -> aggregation.field(KEYWORD_FIELD),
+            CONFIGURE_KEYWORD_FIELD,
             VERIFY_MATCH_ALL_DOCS,
-            ValueType.STRING,        // with type hint
-            NOOP_POST_COLLECTION
+            ValueType.STRING       // with type hint
         );
     }
 
@@ -159,15 +130,13 @@ private void testSearchCase(
         List<String> dataset,
         Consumer<TermsAggregationBuilder> configure,
         Consumer<InternalMappedTerms> verify,
-        ValueType valueType,
-        Consumer<Aggregator> postCollectionConsumer
+        ValueType valueType
     ) throws IOException {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
                 for (String value : dataset) {
                     addField.apply(document, KEYWORD_FIELD, value);
-                    document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(value)));
                     indexWriter.addDocument(document);
                     document.clear();
                 }
diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index 93939657b6981..b8a08068f76a3 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -44,6 +44,7 @@
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
@@ -121,6 +122,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.BiFunction;
 import java.util.function.Consumer;
 import java.util.function.Function;
@@ -137,9 +139,6 @@
 import static org.mockito.Mockito.when;
 
 public class TermsAggregatorTests extends AggregatorTestCase {
-
-    private boolean randomizeAggregatorImpl = true;
-
     // Constants for a script that returns a string
     private static final String STRING_SCRIPT_NAME = "string_script";
     private static final String STRING_SCRIPT_OUTPUT = "Orange";
@@ -172,9 +171,22 @@ protected ScriptService getMockScriptService() {
         return new ScriptService(Settings.EMPTY, engines, ScriptModule.CORE_CONTEXTS);
     }
 
+    protected CountingAggregator createCountingAggregator(
+        AggregationBuilder aggregationBuilder,
+        IndexSearcher indexSearcher,
+        boolean randomizeAggregatorImpl,
+        MappedFieldType... fieldTypes
+    ) throws IOException {
+        return new CountingAggregator(
+            new AtomicInteger(),
+            createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldTypes)
+        );
+    }
+
     protected <A extends Aggregator> A createAggregator(
         AggregationBuilder aggregationBuilder,
         IndexSearcher indexSearcher,
+        boolean randomizeAggregatorImpl,
         MappedFieldType... fieldTypes
     ) throws IOException {
         try {
@@ -189,6 +201,14 @@ protected <A extends Aggregator> A createAggregator(
         }
     }
 
+    protected <A extends Aggregator> A createAggregator(
+        AggregationBuilder aggregationBuilder,
+        IndexSearcher indexSearcher,
+        MappedFieldType... fieldTypes
+    ) throws IOException {
+        return createAggregator(aggregationBuilder, indexSearcher, true, fieldTypes);
+    }
+
     @Override
     protected AggregationBuilder createAggBuilderForTypeTest(MappedFieldType fieldType, String fieldName) {
         return new TermsAggregationBuilder("foo").field(fieldName);
@@ -208,8 +228,6 @@ protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
     }
 
     public void testUsesGlobalOrdinalsByDefault() throws Exception {
-        randomizeAggregatorImpl = false;
-
         Directory directory = newDirectory();
         RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
         indexWriter.close();
@@ -221,7 +239,7 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
             .field("string");
         MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string");
 
-        TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
+        TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, false, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         GlobalOrdinalsStringTermsAggregator globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("dense"));
@@ -259,30 +277,55 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
     }
 
     /**
-     * This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
-     * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
+     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
      */
     public void testSimpleAggregation() throws Exception {
-        testSimple(ADD_SORTED_FIELD_NO_STORE, DEFAULT_POST_COLLECTION);
+        // Fields not indexed: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+
+        // Fields indexed, deleted documents in segment: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+
+        // Fields indexed, no deleted documents in segment: will use LeafBucketCollector#termDocFreqCollector - no documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
+    }
+
+    /**
+     * This test case utilizes the LowCardinality implementation of GlobalOrdinalsStringTermsAggregator.
+     */
+    public void testSimpleAggregationLowCardinality() throws Exception {
+        // Fields not indexed: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+
+        // Fields indexed, deleted documents in segment: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+
+        // Fields indexed, no deleted documents in segment: will use LeafBucketCollector#termDocFreqCollector - no documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
     }
 
     /**
-     * This test case utilizes the low cardinality implementation of GlobalOrdinalsStringTermsAggregator.
-     * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
+     * This test case utilizes the MapStringTermsAggregator.
      */
-    public void testSimpleAggregationWithStoredValues() throws Exception {
-        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used,
-        // therefore using NOOP_POST_COLLECTION
-        // This also verifies that the bucket count is completed without running postCollection()
-        testSimple(ADD_SORTED_FIELD_STORE, NOOP_POST_COLLECTION);
+    public void testSimpleMapStringAggregation() throws Exception {
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, randomBoolean(), randomBoolean(), TermsAggregatorFactory.ExecutionMode.MAP, 4);
     }
 
     /**
      * This is a utility method to test out string terms aggregation
      * @param addFieldConsumer a function that determines how a field is added to the document
+     * @param includeDeletedDocumentsInSegment to include deleted documents in the segment or not
+     * @param collectSegmentOrds collect segment ords or not - set true to utilize LowCardinality implementation for GlobalOrdinalsStringTermsAggregator
+     * @param executionMode execution mode MAP or GLOBAL_ORDINALS
+     * @param expectedCollectCount expected number of documents visited as part of collect() invocation
      */
-    private void testSimple(TriConsumer<Document, String, String> addFieldConsumer, Consumer<Aggregator> postCollectionConsumer)
-        throws Exception {
+    private void testSimple(
+        TriConsumer<Document, String, String> addFieldConsumer,
+        final boolean includeDeletedDocumentsInSegment,
+        boolean collectSegmentOrds,
+        TermsAggregatorFactory.ExecutionMode executionMode,
+        final int expectedCollectCount
+    ) throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
@@ -301,94 +344,84 @@ private void testSimple(TriConsumer<Document, String, String> addFieldConsumer,
                 document = new Document();
                 addFieldConsumer.apply(document, "string", "");
                 indexWriter.addDocument(document);
+
+                if (includeDeletedDocumentsInSegment) {
+                    document = new Document();
+                    ADD_SORTED_SET_FIELD_INDEXED.apply(document, "string", "e");
+                    indexWriter.addDocument(document);
+                    indexWriter.deleteDocuments(new Term("string", "e"));
+                    assertEquals(5, indexWriter.getDocStats().maxDoc);
+                }
+                assertEquals(4, indexWriter.getDocStats().numDocs);
+
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);
-                    for (TermsAggregatorFactory.ExecutionMode executionMode : TermsAggregatorFactory.ExecutionMode.values()) {
-                        TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(
-                            ValueType.STRING
-                        ).executionHint(executionMode.toString()).field("string").order(BucketOrder.key(true));
-                        MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string");
 
-                        TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
-                        aggregator.preCollection();
-                        indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                        postCollectionConsumer.accept(aggregator);
-                        Terms result = reduce(aggregator);
-                        assertEquals(5, result.getBuckets().size());
-                        assertEquals("", result.getBuckets().get(0).getKeyAsString());
-                        assertEquals(2L, result.getBuckets().get(0).getDocCount());
-                        assertEquals("a", result.getBuckets().get(1).getKeyAsString());
-                        assertEquals(2L, result.getBuckets().get(1).getDocCount());
-                        assertEquals("b", result.getBuckets().get(2).getKeyAsString());
-                        assertEquals(2L, result.getBuckets().get(2).getDocCount());
-                        assertEquals("c", result.getBuckets().get(3).getKeyAsString());
-                        assertEquals(1L, result.getBuckets().get(3).getDocCount());
-                        assertEquals("d", result.getBuckets().get(4).getKeyAsString());
-                        assertEquals(1L, result.getBuckets().get(4).getDocCount());
-                        assertTrue(AggregationInspectionHelper.hasValue((InternalTerms) result));
-                    }
+                    TermsAggregationBuilder aggregationBuilder = new TermsAggregationBuilder("_name").userValueTypeHint(ValueType.STRING)
+                        .executionHint(executionMode.toString())
+                        .field("string")
+                        .order(BucketOrder.key(true));
+                    MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string");
+
+                    TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = collectSegmentOrds;
+                    TermsAggregatorFactory.REMAP_GLOBAL_ORDS = false;
+                    CountingAggregator aggregator = createCountingAggregator(aggregationBuilder, indexSearcher, false, fieldType);
+
+                    aggregator.preCollection();
+                    indexSearcher.search(new MatchAllDocsQuery(), aggregator);
+                    aggregator.postCollection();
+                    Terms result = reduce(aggregator);
+                    assertEquals(5, result.getBuckets().size());
+                    assertEquals("", result.getBuckets().get(0).getKeyAsString());
+                    assertEquals(2L, result.getBuckets().get(0).getDocCount());
+                    assertEquals("a", result.getBuckets().get(1).getKeyAsString());
+                    assertEquals(2L, result.getBuckets().get(1).getDocCount());
+                    assertEquals("b", result.getBuckets().get(2).getKeyAsString());
+                    assertEquals(2L, result.getBuckets().get(2).getDocCount());
+                    assertEquals("c", result.getBuckets().get(3).getKeyAsString());
+                    assertEquals(1L, result.getBuckets().get(3).getDocCount());
+                    assertEquals("d", result.getBuckets().get(4).getKeyAsString());
+                    assertEquals(1L, result.getBuckets().get(4).getDocCount());
+                    assertTrue(AggregationInspectionHelper.hasValue((InternalTerms) result));
+
+                    assertEquals(expectedCollectCount, aggregator.getCollectCount().get());
                 }
             }
         }
     }
 
-    /**
-     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
-     * In this case, the segment terms will not get initialized and will run without LeafBucketCollector#termDocFreqCollector optimization
-     */
     public void testStringIncludeExclude() throws Exception {
-        testStringIncludeExclude(
-            (document, field, value) -> document.add(new SortedSetDocValuesField(field, new BytesRef(value))),
-            DEFAULT_POST_COLLECTION
-        );
-    }
-
-    /**
-     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
-     * In this case, the segment terms will get initialized and will use LeafBucketCollector#termDocFreqCollector optimization
-     */
-    public void testStringIncludeExcludeWithStoredValues() throws Exception {
-        // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used
-        // This also verifies that the bucket count is completed without running postCollection()
-        testStringIncludeExclude((document, field, value) -> {
-            document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
-            document.add(new StringField(field, value, Field.Store.NO));
-        }, NOOP_POST_COLLECTION);
-    }
-
-    private void testStringIncludeExclude(TriConsumer<Document, String, String> addField, Consumer<Aggregator> postCollectionConsumer)
-        throws Exception {
         try (Directory directory = newDirectory()) {
             try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
                 Document document = new Document();
-                addField.apply(document, "mv_field", "val000");
-                addField.apply(document, "mv_field", "val001");
-                addField.apply(document, "sv_field", "val001");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val000")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val001")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val001")));
                 indexWriter.addDocument(document);
                 document = new Document();
-                addField.apply(document, "mv_field", "val002");
-                addField.apply(document, "mv_field", "val003");
-                addField.apply(document, "sv_field", "val003");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val002")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val003")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val003")));
                 indexWriter.addDocument(document);
                 document = new Document();
-                addField.apply(document, "mv_field", "val004");
-                addField.apply(document, "mv_field", "val005");
-                addField.apply(document, "sv_field", "val005");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val004")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val005")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val005")));
                 indexWriter.addDocument(document);
                 document = new Document();
-                addField.apply(document, "mv_field", "val006");
-                addField.apply(document, "mv_field", "val007");
-                addField.apply(document, "sv_field", "val007");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val006")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val007")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val007")));
                 indexWriter.addDocument(document);
                 document = new Document();
-                addField.apply(document, "mv_field", "val008");
-                addField.apply(document, "mv_field", "val009");
-                addField.apply(document, "sv_field", "val009");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val008")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val009")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val009")));
                 indexWriter.addDocument(document);
                 document = new Document();
-                addField.apply(document, "mv_field", "val010");
-                addField.apply(document, "mv_field", "val011");
-                addField.apply(document, "sv_field", "val011");
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val010")));
+                document.add(new SortedSetDocValuesField("mv_field", new BytesRef("val011")));
+                document.add(new SortedDocValuesField("sv_field", new BytesRef("val011")));
                 indexWriter.addDocument(document);
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);
@@ -405,7 +438,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     Terms result = reduce(aggregator);
                     assertEquals(10, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
@@ -440,7 +473,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType2);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     result = reduce(aggregator);
                     assertEquals(5, result.getBuckets().size());
                     assertEquals("val001", result.getBuckets().get(0).getKeyAsString());
@@ -464,7 +497,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     result = reduce(aggregator);
                     assertEquals(8, result.getBuckets().size());
                     assertEquals("val002", result.getBuckets().get(0).getKeyAsString());
@@ -493,7 +526,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val010", result.getBuckets().get(0).getKeyAsString());
@@ -510,7 +543,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
@@ -542,7 +575,7 @@ private void testStringIncludeExclude(TriConsumer<Document, String, String> addF
                     aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
                     aggregator.preCollection();
                     indexSearcher.search(new MatchAllDocsQuery(), aggregator);
-                    postCollectionConsumer.accept(aggregator);
+                    aggregator.postCollection();
                     result = reduce(aggregator);
                     assertEquals(2, result.getBuckets().size());
                     assertEquals("val000", result.getBuckets().get(0).getKeyAsString());
diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
index 9fdae80bd1ada..4eb49ebb42241 100644
--- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java
@@ -124,6 +124,7 @@
 import org.opensearch.search.aggregations.AggregatorFactories.Builder;
 import org.opensearch.search.aggregations.MultiBucketConsumerService.MultiBucketConsumer;
 import org.opensearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
+import org.opensearch.search.aggregations.bucket.terms.TermsAggregator;
 import org.opensearch.search.aggregations.metrics.MetricsAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator;
 import org.opensearch.search.aggregations.pipeline.PipelineAggregator.PipelineTree;
@@ -150,6 +151,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.Supplier;
@@ -181,23 +183,10 @@ public abstract class AggregatorTestCase extends OpenSearchTestCase {
     // A list of field types that should not be tested, or are not currently supported
     private static List<String> TYPE_TEST_DENYLIST;
 
-    protected static final Consumer<Aggregator> DEFAULT_POST_COLLECTION = termsAggregator -> {
-        try {
-            termsAggregator.postCollection();
-        } catch (IOException e) {
-            throw new RuntimeException(e);
-        }
-    };
-
-    // aggregator.postCollection() is not required when LeafBucketCollector#termDocFreqCollector optimization is used.
-    // using NOOP_POST_COLLECTION_CONSUMER ensures that the bucket count in aggregation is completed before/without running postCollection()
-    protected static final Consumer<Aggregator> NOOP_POST_COLLECTION = termsAggregator -> {};
+    protected static final TriConsumer<Document, String, String> ADD_SORTED_SET_FIELD_NOT_INDEXED = (document, field, value) -> document
+        .add(new SortedSetDocValuesField(field, new BytesRef(value)));
 
-    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_NO_STORE = (document, field, value) -> document.add(
-        new SortedSetDocValuesField(field, new BytesRef(value))
-    );
-
-    protected static final TriConsumer<Document, String, String> ADD_SORTED_FIELD_STORE = (document, field, value) -> {
+    protected static final TriConsumer<Document, String, String> ADD_SORTED_SET_FIELD_INDEXED = (document, field, value) -> {
         document.add(new SortedSetDocValuesField(field, new BytesRef(value)));
         document.add(new StringField(field, value, Field.Store.NO));
     };
@@ -457,7 +446,6 @@ protected QueryShardContext queryShardContextMock(
         CircuitBreakerService circuitBreakerService,
         BigArrays bigArrays
     ) {
-
         return new QueryShardContext(
             0,
             indexSettings,
@@ -508,16 +496,6 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, fieldTypes);
     }
 
-    protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
-        IndexSearcher searcher,
-        Query query,
-        AggregationBuilder builder,
-        Consumer<Aggregator> postCollectionConsumer,
-        MappedFieldType... fieldTypes
-    ) throws IOException {
-        return searchAndReduce(createIndexSettings(), searcher, query, builder, DEFAULT_MAX_BUCKETS, postCollectionConsumer, fieldTypes);
-    }
-
     protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
         IndexSettings indexSettings,
         IndexSearcher searcher,
@@ -538,17 +516,6 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         return searchAndReduce(createIndexSettings(), searcher, query, builder, maxBucket, fieldTypes);
     }
 
-    protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduce(
-        IndexSettings indexSettings,
-        IndexSearcher searcher,
-        Query query,
-        AggregationBuilder builder,
-        int maxBucket,
-        MappedFieldType... fieldTypes
-    ) throws IOException {
-        return searchAndReduce(indexSettings, searcher, query, builder, maxBucket, DEFAULT_POST_COLLECTION, fieldTypes);
-    }
-
     /**
      * Collects all documents that match the provided query {@link Query} and
      * returns the reduced {@link InternalAggregation}.
@@ -563,7 +530,6 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
         Query query,
         AggregationBuilder builder,
         int maxBucket,
-        Consumer<Aggregator> postCollectionConsumer,
         MappedFieldType... fieldTypes
     ) throws IOException {
         final IndexReaderContext ctx = searcher.getTopReaderContext();
@@ -594,13 +560,13 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
                 a.preCollection();
                 Weight weight = subSearcher.createWeight(rewritten, ScoreMode.COMPLETE, 1f);
                 subSearcher.search(weight, a);
-                postCollectionConsumer.accept(a);
+                a.postCollection();
                 aggs.add(a.buildTopLevel());
             }
         } else {
             root.preCollection();
             searcher.search(rewritten, root);
-            postCollectionConsumer.accept(root);
+            root.postCollection();
             aggs.add(root.buildTopLevel());
         }
 
@@ -1142,6 +1108,89 @@ protected void doWriteTo(StreamOutput out) throws IOException {
         }
     }
 
+    /**
+     * Wrapper around Aggregator class
+     * Maintains a count for times collect() is invoked - number of documents visited
+     */
+    protected static class CountingAggregator extends Aggregator {
+        private final AtomicInteger collectCounter;
+        public final Aggregator delegate;
+
+        public CountingAggregator(AtomicInteger collectCounter, TermsAggregator delegate) {
+            this.collectCounter = collectCounter;
+            this.delegate = delegate;
+        }
+
+        public AtomicInteger getCollectCount() {
+            return collectCounter;
+        }
+
+        @Override
+        public void close() {
+            delegate.close();
+        }
+
+        @Override
+        public String name() {
+            return delegate.name();
+        }
+
+        @Override
+        public SearchContext context() {
+            return delegate.context();
+        }
+
+        @Override
+        public Aggregator parent() {
+            return delegate.parent();
+        }
+
+        @Override
+        public Aggregator subAggregator(String name) {
+            return delegate.subAggregator(name);
+        }
+
+        @Override
+        public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException {
+            return delegate.buildAggregations(owningBucketOrds);
+        }
+
+        @Override
+        public InternalAggregation buildEmptyAggregation() {
+            return delegate.buildEmptyAggregation();
+        }
+
+        @Override
+        public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
+            return new LeafBucketCollector() {
+                @Override
+                public void collect(int doc, long bucket) throws IOException {
+                    delegate.getLeafCollector(ctx).collect(doc, bucket);
+                    collectCounter.incrementAndGet();
+                }
+            };
+        }
+
+        @Override
+        public ScoreMode scoreMode() {
+            return delegate.scoreMode();
+        }
+
+        @Override
+        public void preCollection() throws IOException {
+            delegate.preCollection();
+        }
+
+        @Override
+        public void postCollection() throws IOException {
+            delegate.postCollection();
+        }
+
+        public void setWeight(Weight weight) {
+            this.delegate.setWeight(weight);
+        }
+    }
+
     public static class InternalAggCardinality extends InternalAggregation {
         private final CardinalityUpperBound cardinality;
 

From 128db478a4a8b5b15febc60214fbd4b2a79c1525 Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Fri, 8 Mar 2024 16:32:12 -0800
Subject: [PATCH 28/30] test typos

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/TermsAggregatorTests.java         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index b8a08068f76a3..8272c31e95e54 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -228,6 +228,7 @@ protected List<ValuesSourceType> getSupportedValuesSourceTypes() {
     }
 
     public void testUsesGlobalOrdinalsByDefault() throws Exception {
+        boolean randomizeAggregatorImpl = false;
         Directory directory = newDirectory();
         RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
         indexWriter.close();
@@ -239,35 +240,35 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
             .field("string");
         MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType("string");
 
-        TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, false, fieldType);
+        TermsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         GlobalOrdinalsStringTermsAggregator globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("dense"));
 
         // Infers depth_first because the maxOrd is 0 which is less than the size
         aggregationBuilder.subAggregation(AggregationBuilders.cardinality("card").field("string"));
-        aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
+        aggregator = createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.collectMode, equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
 
         aggregationBuilder.collectMode(Aggregator.SubAggCollectionMode.DEPTH_FIRST);
-        aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
+        aggregator = createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.collectMode, equalTo(Aggregator.SubAggCollectionMode.DEPTH_FIRST));
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
 
         aggregationBuilder.collectMode(Aggregator.SubAggCollectionMode.BREADTH_FIRST);
-        aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
+        aggregator = createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.collectMode, equalTo(Aggregator.SubAggCollectionMode.BREADTH_FIRST));
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("dense"));
 
         aggregationBuilder.order(BucketOrder.aggregation("card", true));
-        aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType);
+        aggregator = createAggregator(aggregationBuilder, indexSearcher, randomizeAggregatorImpl, fieldType);
         assertThat(aggregator, instanceOf(GlobalOrdinalsStringTermsAggregator.class));
         globalAgg = (GlobalOrdinalsStringTermsAggregator) aggregator;
         assertThat(globalAgg.descriptCollectionStrategy(), equalTo("remap"));
@@ -350,9 +351,8 @@ private void testSimple(
                     ADD_SORTED_SET_FIELD_INDEXED.apply(document, "string", "e");
                     indexWriter.addDocument(document);
                     indexWriter.deleteDocuments(new Term("string", "e"));
-                    assertEquals(5, indexWriter.getDocStats().maxDoc);
+                    assertEquals(5, indexWriter.getDocStats().maxDoc);  // deleted document still in segment
                 }
-                assertEquals(4, indexWriter.getDocStats().numDocs);
 
                 try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
                     IndexSearcher indexSearcher = newIndexSearcher(indexReader);

From 7954045710ccd1ef8a831b271ad0dd66d3643dce Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 11 Mar 2024 00:35:06 -0700
Subject: [PATCH 29/30] Add test case for _doc_count

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 .../bucket/terms/TermsAggregatorTests.java    | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
index 8272c31e95e54..cfb04d2aa1d19 100644
--- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
+++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/TermsAggregatorTests.java
@@ -278,38 +278,54 @@ public void testUsesGlobalOrdinalsByDefault() throws Exception {
     }
 
     /**
-     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator.
+     * This test case utilizes the default implementation of GlobalOrdinalsStringTermsAggregator since collectSegmentOrds is false
      */
     public void testSimpleAggregation() throws Exception {
         // Fields not indexed: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
 
         // Fields indexed, deleted documents in segment: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
 
         // Fields indexed, no deleted documents in segment: will use LeafBucketCollector#termDocFreqCollector - no documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, false, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
+
+        // Fields indexed, no deleted documents, but _doc_field value present in document:
+        // cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, true, false, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+
     }
 
     /**
-     * This test case utilizes the LowCardinality implementation of GlobalOrdinalsStringTermsAggregator.
+     * This test case utilizes the LowCardinality implementation of GlobalOrdinalsStringTermsAggregator since collectSegmentOrds is true
      */
     public void testSimpleAggregationLowCardinality() throws Exception {
         // Fields not indexed: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+        testSimple(ADD_SORTED_SET_FIELD_NOT_INDEXED, false, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
 
         // Fields indexed, deleted documents in segment: cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, true, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
 
         // Fields indexed, no deleted documents in segment: will use LeafBucketCollector#termDocFreqCollector - no documents are visited
-        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, false, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 0);
+
+        // Fields indexed, no deleted documents, but _doc_field value present in document:
+        // cannot use LeafBucketCollector#termDocFreqCollector - all documents are visited
+        testSimple(ADD_SORTED_SET_FIELD_INDEXED, false, true, true, TermsAggregatorFactory.ExecutionMode.GLOBAL_ORDINALS, 4);
     }
 
     /**
      * This test case utilizes the MapStringTermsAggregator.
      */
     public void testSimpleMapStringAggregation() throws Exception {
-        testSimple(ADD_SORTED_SET_FIELD_INDEXED, randomBoolean(), randomBoolean(), TermsAggregatorFactory.ExecutionMode.MAP, 4);
+        testSimple(
+            ADD_SORTED_SET_FIELD_INDEXED,
+            randomBoolean(),
+            randomBoolean(),
+            randomBoolean(),
+            TermsAggregatorFactory.ExecutionMode.MAP,
+            4
+        );
     }
 
     /**
@@ -323,6 +339,7 @@ public void testSimpleMapStringAggregation() throws Exception {
     private void testSimple(
         TriConsumer<Document, String, String> addFieldConsumer,
         final boolean includeDeletedDocumentsInSegment,
+        final boolean includeDocCountField,
         boolean collectSegmentOrds,
         TermsAggregatorFactory.ExecutionMode executionMode,
         final int expectedCollectCount
@@ -344,6 +361,10 @@ private void testSimple(
                 indexWriter.addDocument(document);
                 document = new Document();
                 addFieldConsumer.apply(document, "string", "");
+                if (includeDocCountField) {
+                    // Adding _doc_count to one document
+                    document.add(new NumericDocValuesField("_doc_count", 10));
+                }
                 indexWriter.addDocument(document);
 
                 if (includeDeletedDocumentsInSegment) {
@@ -373,7 +394,11 @@ private void testSimple(
                     Terms result = reduce(aggregator);
                     assertEquals(5, result.getBuckets().size());
                     assertEquals("", result.getBuckets().get(0).getKeyAsString());
-                    assertEquals(2L, result.getBuckets().get(0).getDocCount());
+                    if (includeDocCountField) {
+                        assertEquals(11L, result.getBuckets().get(0).getDocCount());
+                    } else {
+                        assertEquals(2L, result.getBuckets().get(0).getDocCount());
+                    }
                     assertEquals("a", result.getBuckets().get(1).getKeyAsString());
                     assertEquals(2L, result.getBuckets().get(1).getDocCount());
                     assertEquals("b", result.getBuckets().get(2).getKeyAsString());

From cdc42044c7be5c727b8eb012ff92e51749e6bd4d Mon Sep 17 00:00:00 2001
From: Sandesh Kumar <sandeshkr419@gmail.com>
Date: Mon, 11 Mar 2024 17:08:10 -0700
Subject: [PATCH 30/30] Rewording for changelog and minor documentation changes

Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
---
 CHANGELOG.md                                                    | 2 +-
 .../bucket/terms/GlobalOrdinalsStringTermsAggregator.java       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2be1d8eda34f6..7f0118711126e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -128,7 +128,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Changed
 - Allow composite aggregation to run under a parent filter aggregation ([#11499](https://github.com/opensearch-project/OpenSearch/pull/11499))
-- Improve string terms aggregation performance using Collector#setWeight ([#11643](https://github.com/opensearch-project/OpenSearch/pull/11643))
+- Quickly compute terms aggregations when the top-level query is functionally match-all for a segment ([#11643](https://github.com/opensearch-project/OpenSearch/pull/11643))
 
 ### Deprecated
 
diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
index 15e538f01e632..69fda2f3f6133 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java
@@ -159,7 +159,7 @@ public void setWeight(Weight weight) {
     }
 
     /**
-     Collects term frequencies for a given field from a LeafReaderContext directly from stored segment terms
+     Read doc frequencies directly from indexed terms in the segment to skip iterating through individual documents
      @param ctx The LeafReaderContext to collect terms from
      @param globalOrds The SortedSetDocValues for the field's ordinals
      @param ordCountConsumer A consumer to accept collected term frequencies