Skip to content

Commit

Permalink
Merge pull request #32493 from vespa-engine/revert-32491-bratseth/seg…
Browse files Browse the repository at this point in the history
…ment-on-index

Revert "Require positive proof of an index field to segment"
  • Loading branch information
bratseth authored Sep 30, 2024
2 parents 5fd7a5d + b4db18b commit bd71de0
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
*
* @author baldersheim
*/

public class ExactStringItem extends WordItem {

public ExactStringItem(String substring) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Objects;
import java.util.Optional;


/**
* A term which contains a fixed length phrase, a collection of word terms,
* resulting from a single segmentation operation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.Location;
import com.yahoo.prelude.query.AndItem;
Expand Down Expand Up @@ -742,7 +741,7 @@ private Item instantiatePhraseSegmentItem(String field, OperatorNode<ExpressionO
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}

if (words != null && ! words.isEmpty()) {
if (words != null && words.size() > 0) {
for (String word : words) {
phrase.addItem(new WordItem(word, field, true));
}
Expand Down Expand Up @@ -1516,7 +1515,6 @@ private Item instantiateWordItem(String field,
boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE,
"setting for whether to use substring match of input data");
boolean exact = exactMatch != null ? exactMatch : indexFactsSession.getIndex(indexNameExpander.expand(field)).isExact();

String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
Query.Type.WEAKAND.toString(), "grammar for handling word input");
Preconditions.checkArgument((prefixMatch ? 1 : 0) +
Expand Down Expand Up @@ -1560,7 +1558,7 @@ private Item instantiateWordItem(String field,
}

private boolean shouldSegment(String field, boolean fromQuery) {
return fromQuery && indexFactsSession.getIndex(indexNameExpander.expand(field)).isIndex();
return fromQuery && ! indexFactsSession.getIndex(indexNameExpander.expand(field)).isAttribute();
}

private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import com.yahoo.component.chain.Chain;
import com.yahoo.language.Language;
import com.yahoo.language.Linguistics;
import com.yahoo.prelude.*;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.IndexFactsFactory;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.parser.TestLinguistics;
Expand Down Expand Up @@ -56,15 +57,8 @@ void testCjkQueryWithOverlappingTokens() {

@Test
public void testEquivAndChinese() {
SearchDefinition schema = new SearchDefinition("music-only");
Index stringIndex = new Index("default");
stringIndex.setIndex(true);
stringIndex.setString(true);
schema.addIndex(stringIndex);
var indexFacts = new IndexFacts(new IndexModel(schema));

Query query = new Query(QueryTestCase.httpEncode("search?yql=select * from music-only where default contains equiv('a', 'b c') or default contains '东'"));
new Execution(new Chain<>(new MinimalQueryInserter(), new CJKSearcher()), Execution.Context.createContextStub(indexFacts)).search(query);
new Execution(new Chain<>(new MinimalQueryInserter(), new CJKSearcher()), Execution.Context.createContextStub()).search(query);
assertEquals("OR (EQUIV default:a default:'b c') default:东", query.getModel().getQueryTree().toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void testUserLanguageIsDetectedWithUserQueryEnglishAlsoWithNonEnglishStructuredQ
Result result = execution.search(query);
assertNull(result.hits().getError());
assertEquals(Language.ENGLISH, query.getModel().getParsingLanguage()); // by UNKNOWN -> ENGLISH
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) executions)", query.getModel().getQueryTree().toString());
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) executions)", query.getModel().getQueryTree().toString());
}

@Test
Expand All @@ -195,7 +195,7 @@ void testUserLanguageIsDetectedWithUserInputEnglishAlsoWithNonEnglishStructuredQ
Result result = execution.search(query);
assertNull(result.hits().getError());
assertEquals(Language.ENGLISH, query.getModel().getParsingLanguage()); // by UNKNOWN -> ENGLISH
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) default:executions)", query.getModel().getQueryTree().toString());
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) default:executions)", query.getModel().getQueryTree().toString());
}

@Test
Expand Down
Loading

0 comments on commit bd71de0

Please sign in to comment.