Skip to content

Commit

Permalink
Merge pull request #32491 from vespa-engine/bratseth/segment-on-index
Browse files Browse the repository at this point in the history
Require positive proof of an index field to segment
  • Loading branch information
bratseth authored Sep 30, 2024
2 parents a2ab633 + 57619af commit 5fd7a5d
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
*
* @author baldersheim
*/

public class ExactStringItem extends WordItem {

public ExactStringItem(String substring) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.util.Objects;
import java.util.Optional;


/**
* A term which contains a fixed length phrase, a collection of word terms,
* resulting from a single segmentation operation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import com.yahoo.language.detect.Detector;
import com.yahoo.language.process.Normalizer;
import com.yahoo.language.process.Segmenter;
import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.Location;
import com.yahoo.prelude.query.AndItem;
Expand Down Expand Up @@ -741,7 +742,7 @@ private Item instantiatePhraseSegmentItem(String field, OperatorNode<ExpressionO
words = segmenter.segment(origin.getValue(), currentlyParsing.getLanguage());
}

if (words != null && words.size() > 0) {
if (words != null && ! words.isEmpty()) {
for (String word : words) {
phrase.addItem(new WordItem(word, field, true));
}
Expand Down Expand Up @@ -1515,6 +1516,7 @@ private Item instantiateWordItem(String field,
boolean substrMatch = getAnnotation(ast, SUBSTRING, Boolean.class, Boolean.FALSE,
"setting for whether to use substring match of input data");
boolean exact = exactMatch != null ? exactMatch : indexFactsSession.getIndex(indexNameExpander.expand(field)).isExact();

String grammar = getAnnotation(ast, USER_INPUT_GRAMMAR, String.class,
Query.Type.WEAKAND.toString(), "grammar for handling word input");
Preconditions.checkArgument((prefixMatch ? 1 : 0) +
Expand Down Expand Up @@ -1558,7 +1560,7 @@ private Item instantiateWordItem(String field,
}

private boolean shouldSegment(String field, boolean fromQuery) {
return fromQuery && ! indexFactsSession.getIndex(indexNameExpander.expand(field)).isAttribute();
return fromQuery && indexFactsSession.getIndex(indexNameExpander.expand(field)).isIndex();
}

private TaggableItem segment(String field, OperatorNode<ExpressionOperator> ast, String wordData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
import com.yahoo.component.chain.Chain;
import com.yahoo.language.Language;
import com.yahoo.language.Linguistics;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.IndexFactsFactory;
import com.yahoo.prelude.*;
import com.yahoo.prelude.query.Item;
import com.yahoo.prelude.query.NullItem;
import com.yahoo.prelude.query.parser.TestLinguistics;
Expand Down Expand Up @@ -57,8 +56,15 @@ void testCjkQueryWithOverlappingTokens() {

@Test
public void testEquivAndChinese() {
SearchDefinition schema = new SearchDefinition("music-only");
Index stringIndex = new Index("default");
stringIndex.setIndex(true);
stringIndex.setString(true);
schema.addIndex(stringIndex);
var indexFacts = new IndexFacts(new IndexModel(schema));

Query query = new Query(QueryTestCase.httpEncode("search?yql=select * from music-only where default contains equiv('a', 'b c') or default contains '东'"));
new Execution(new Chain<>(new MinimalQueryInserter(), new CJKSearcher()), Execution.Context.createContextStub()).search(query);
new Execution(new Chain<>(new MinimalQueryInserter(), new CJKSearcher()), Execution.Context.createContextStub(indexFacts)).search(query);
assertEquals("OR (EQUIV default:a default:'b c') default:东", query.getModel().getQueryTree().toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ void testUserLanguageIsDetectedWithUserQueryEnglishAlsoWithNonEnglishStructuredQ
Result result = execution.search(query);
assertNull(result.hits().getError());
assertEquals(Language.ENGLISH, query.getModel().getParsingLanguage()); // by UNKNOWN -> ENGLISH
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) executions)", query.getModel().getQueryTree().toString());
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) executions)", query.getModel().getQueryTree().toString());
}

@Test
Expand All @@ -195,7 +195,7 @@ void testUserLanguageIsDetectedWithUserInputEnglishAlsoWithNonEnglishStructuredQ
Result result = execution.search(query);
assertNull(result.hits().getError());
assertEquals(Language.ENGLISH, query.getModel().getParsingLanguage()); // by UNKNOWN -> ENGLISH
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) default:executions)", query.getModel().getQueryTree().toString());
assertEquals("AND attribute_key:我能吞下玻璃而不伤身体 (WEAKAND(100) default:executions)", query.getModel().getQueryTree().toString());
}

@Test
Expand Down
Loading

0 comments on commit 5fd7a5d

Please sign in to comment.