From 2ef020edc281e3f433587bb5ec17fa0999151d3e Mon Sep 17 00:00:00 2001 From: Mohammad Hasnain Mohsin Rajan Date: Tue, 2 Apr 2024 03:38:49 +0530 Subject: [PATCH] feat: constant keyword field (#12285) Constant keyword fields behave similarly to regular keyword fields, except that they are defined only in the index mapping, and all documents in the index appear to have the same value for the constant keyword field. --------- Signed-off-by: Mohammad Hasnain (cherry picked from commit 1ec49bd3aee6cd4463ae91ffb6c09054d1bbc02b) --- CHANGELOG.md | 1 + .../mapper/ConstantKeywordFieldMapper.java | 191 ++++++++++++++++++ .../org/opensearch/indices/IndicesModule.java | 2 + .../ConstantKeywordFieldMapperTests.java | 114 +++++++++++ .../mapper/ConstantKeywordFieldTypeTests.java | 93 +++++++++ .../terms/SignificantTextAggregatorTests.java | 4 +- .../aggregations/AggregatorTestCase.java | 5 + 7 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c1b96e322380..57fd23b3cabec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added - Add explicit dependency to validatePom and generatePom tasks ([#12909](https://github.com/opensearch-project/OpenSearch/pull/12909)) +- Constant Keyword Field ([#12285](https://github.com/opensearch-project/OpenSearch/pull/12285)) - [Concurrent Segment Search] Perform buildAggregation concurrently and support Composite Aggregations ([#12697](https://github.com/opensearch-project/OpenSearch/pull/12697)) - Convert ingest processor supports ip type ([#12818](https://github.com/opensearch-project/OpenSearch/pull/12818)) - Allow setting KEYSTORE_PASSWORD through env variable ([#12865](https://github.com/opensearch-project/OpenSearch/pull/12865)) diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java new file mode 100644 index 0000000000000..f4730c70362d1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java @@ -0,0 +1,191 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.opensearch.OpenSearchParseException; +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.regex.Regex; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.plain.ConstantIndexFieldData; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.aggregations.support.CoreValuesSourceType; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +/** + * Index specific field mapper + * + * @opensearch.api + */ +@PublicApi(since = "2.14.0") +public class ConstantKeywordFieldMapper extends ParametrizedFieldMapper { + + public static final String CONTENT_TYPE = "constant_keyword"; + + private static final String valuePropertyName = "value"; + + /** + * A {@link Mapper.TypeParser} for the constant keyword field. + * + * @opensearch.internal + */ + public static class TypeParser implements Mapper.TypeParser { + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + if (!node.containsKey(valuePropertyName)) { + throw new OpenSearchParseException("Field [" + name + "] is missing required parameter [value]"); + } + Object value = node.remove(valuePropertyName); + if (!(value instanceof String)) { + throw new OpenSearchParseException("Field [" + name + "] is expected to be a string value"); + } + return new Builder(name, (String) value); + } + } + + private static ConstantKeywordFieldMapper toType(FieldMapper in) { + return (ConstantKeywordFieldMapper) in; + } + + /** + * Builder for the binary field mapper + * + * @opensearch.internal + */ + public static class Builder extends ParametrizedFieldMapper.Builder { + + private final Parameter value; + + public Builder(String name, String value) { + super(name); + this.value = Parameter.stringParam(valuePropertyName, false, m -> toType(m).value, value); + } + + @Override + public List> getParameters() { + return Arrays.asList(value); + } + + @Override + public ConstantKeywordFieldMapper build(BuilderContext context) { + return new ConstantKeywordFieldMapper( + name, + new ConstantKeywordFieldMapper.ConstantKeywordFieldType(buildFullName(context), value.getValue()), + multiFieldsBuilder.build(this, context), + copyTo.build(), + this + ); + } + } + + /** + * Field type for Index field mapper + * + * @opensearch.internal + */ + @PublicApi(since = "2.14.0") + protected static final class ConstantKeywordFieldType extends ConstantFieldType { + + protected final String value; + + public ConstantKeywordFieldType(String name, String value) { + super(name, Collections.emptyMap()); + this.value = value; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + protected boolean matches(String pattern, boolean caseInsensitive, QueryShardContext context) { + return Regex.simpleMatch(pattern, value, caseInsensitive); + } + + @Override + public Query existsQuery(QueryShardContext context) { + return new MatchAllDocsQuery(); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + return new ConstantIndexFieldData.Builder(fullyQualifiedIndexName, name(), CoreValuesSourceType.BYTES); + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't " + "support formats."); + } + + return new SourceValueFetcher(name(), context) { + @Override + protected Object parseSourceValue(Object value) { + String keywordValue = value.toString(); + return Collections.singletonList(keywordValue); + } + }; + } + } + + private final String value; + + protected ConstantKeywordFieldMapper( + String simpleName, + MappedFieldType mappedFieldType, + MultiFields multiFields, + CopyTo copyTo, + ConstantKeywordFieldMapper.Builder builder + ) { + super(simpleName, mappedFieldType, multiFields, copyTo); + this.value = builder.value.getValue(); + } + + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new ConstantKeywordFieldMapper.Builder(simpleName(), this.value).init(this); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + + final String value; + if (context.externalValueSet()) { + value = context.externalValue().toString(); + } else { + value = context.parser().textOrNull(); + } + if (value == null) { + throw new IllegalArgumentException("constant keyword field [" + name() + "] must have a value"); + } + + if (!value.equals(fieldType().value)) { + throw new IllegalArgumentException("constant keyword field [" + name() + "] must have a value of [" + this.value + "]"); + } + + } + + @Override + public ConstantKeywordFieldMapper.ConstantKeywordFieldType fieldType() { + return (ConstantKeywordFieldMapper.ConstantKeywordFieldType) super.fieldType(); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } +} diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index b86e98f4ebcbc..fee2888c7a3fb 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -46,6 +46,7 @@ import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.CompletionFieldMapper; +import org.opensearch.index.mapper.ConstantKeywordFieldMapper; import org.opensearch.index.mapper.DataStreamFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.DocCountFieldMapper; @@ -168,6 +169,7 @@ public static Map getMappers(List mappe mappers.put(FieldAliasMapper.CONTENT_TYPE, new FieldAliasMapper.TypeParser()); mappers.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); mappers.put(FlatObjectFieldMapper.CONTENT_TYPE, FlatObjectFieldMapper.PARSER); + mappers.put(ConstantKeywordFieldMapper.CONTENT_TYPE, new ConstantKeywordFieldMapper.TypeParser()); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java new file mode 100644 index 0000000000000..65dd3b6447663 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldMapperTests.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.IndexableField; +import org.opensearch.OpenSearchParseException; +import org.opensearch.common.CheckedConsumer; +import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.IndexService; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.Collection; + +import static org.hamcrest.Matchers.containsString; + +public class ConstantKeywordFieldMapperTests extends OpenSearchSingleNodeTestCase { + + private IndexService indexService; + private DocumentMapperParser parser; + + @Override + protected Collection> getPlugins() { + return pluginList(InternalSettingsPlugin.class); + } + + @Before + public void setup() { + indexService = createIndex("test"); + parser = indexService.mapperService().documentMapperParser(); + } + + public void testDefaultDisabledIndexMapper() throws Exception { + + XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "constant_keyword") + .field("value", "default_value") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject(); + mapping = mapping.endObject().endObject().endObject(); + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping.toString())); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> mapper.parse(source(b -> { + b.field("field", "sdf"); + b.field("field2", "szdfvsddf"); + }))); + assertThat( + e.getMessage(), + containsString( + "failed to parse field [field] of type [constant_keyword] in document with id '1'. Preview of field's value: 'sdf'" + ) + ); + + final ParsedDocument doc = mapper.parse(source(b -> { + b.field("field", "default_value"); + b.field("field2", "field_2_value"); + })); + + final IndexableField field = doc.rootDoc().getField("field"); + + // constantKeywordField should not be stored + assertNull(field); + } + + public void testMissingDefaultIndexMapper() throws Exception { + + final XContentBuilder mapping = XContentFactory.jsonBuilder() + .startObject() + .startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "constant_keyword") + .endObject() + .startObject("field2") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject(); + + OpenSearchParseException e = expectThrows( + OpenSearchParseException.class, + () -> parser.parse("type", new CompressedXContent(mapping.toString())) + ); + assertThat(e.getMessage(), containsString("Field [field] is missing required parameter [value]")); + } + + private final SourceToParse source(CheckedConsumer build) throws IOException { + XContentBuilder builder = JsonXContent.contentBuilder().startObject(); + build.accept(builder); + builder.endObject(); + return new SourceToParse("test", "1", BytesReference.bytes(builder), MediaTypeRegistry.JSON); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java new file mode 100644 index 0000000000000..235811539a299 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java @@ -0,0 +1,93 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.regex.Regex; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.query.QueryShardContext; + +import java.util.Arrays; +import java.util.List; +import java.util.function.Predicate; + +public class ConstantKeywordFieldTypeTests extends FieldTypeTestCase { + + final ConstantKeywordFieldMapper.ConstantKeywordFieldType ft = new ConstantKeywordFieldMapper.ConstantKeywordFieldType( + "field", + "default" + ); + + public void testTermQuery() { + assertEquals(new MatchAllDocsQuery(), ft.termQuery("default", createContext())); + assertEquals(new MatchNoDocsQuery(), ft.termQuery("not_default", createContext())); + } + + public void testTermsQuery() { + assertEquals(new MatchAllDocsQuery(), ft.termsQuery(Arrays.asList("default", "not_default"), createContext())); + assertEquals(new MatchNoDocsQuery(), ft.termsQuery(Arrays.asList("no_default", "not_default"), createContext())); + assertEquals(new MatchNoDocsQuery(), ft.termsQuery(List.of(), createContext())); + } + + public void testInsensitiveTermQuery() { + assertEquals(new MatchAllDocsQuery(), ft.termQueryCaseInsensitive("defaUlt", createContext())); + assertEquals(new MatchNoDocsQuery(), ft.termQueryCaseInsensitive("not_defaUlt", createContext())); + } + + public void testPrefixQuery() { + assertEquals(new MatchAllDocsQuery(), ft.prefixQuery("defau", null, createContext())); + assertEquals(new MatchNoDocsQuery(), ft.prefixQuery("not_default", null, createContext())); + } + + public void testWildcardQuery() { + assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("defa*lt", null, createContext())); + assertEquals(new MatchNoDocsQuery(), ft.wildcardQuery("no_defa*lt", null, createContext())); + assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("defa*", null, createContext())); + assertEquals(new MatchAllDocsQuery(), ft.wildcardQuery("*ult", null, createContext())); + + } + + public void testExistsQuery() { + assertEquals(new MatchAllDocsQuery(), ft.existsQuery(createContext())); + } + + private QueryShardContext createContext() { + IndexMetadata indexMetadata = IndexMetadata.builder("index") + .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + IndexSettings indexSettings = new IndexSettings(indexMetadata, Settings.EMPTY); + + Predicate indexNameMatcher = pattern -> Regex.simpleMatch(pattern, "index"); + return new QueryShardContext( + 0, + indexSettings, + null, + null, + null, + null, + null, + null, + xContentRegistry(), + writableRegistry(), + null, + null, + System::currentTimeMillis, + null, + indexNameMatcher, + () -> true, + null + ); + } +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java index e9b2d40fd4ede..644cee57bd5a4 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/SignificantTextAggregatorTests.java @@ -50,6 +50,7 @@ import org.opensearch.index.analysis.AnalyzerScope; import org.opensearch.index.analysis.NamedAnalyzer; import org.opensearch.index.mapper.BinaryFieldMapper; +import org.opensearch.index.mapper.ConstantKeywordFieldMapper; import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; import org.opensearch.index.mapper.MappedFieldType; @@ -104,7 +105,8 @@ protected List unsupportedMappedFieldTypes() { return Arrays.asList( BinaryFieldMapper.CONTENT_TYPE, // binary fields are not supported because they do not have analyzers GeoPointFieldMapper.CONTENT_TYPE, // geopoint fields cannot use term queries - FlatObjectFieldMapper.CONTENT_TYPE // flat_object fields are not supported aggregations + FlatObjectFieldMapper.CONTENT_TYPE, // flat_object fields are not supported aggregations + ConstantKeywordFieldMapper.CONTENT_TYPE // binary fields are not supported because they do not have analyzers ); } diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 4eb49ebb42241..f83163bd139cd 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -95,6 +95,7 @@ import org.opensearch.index.fielddata.IndexFieldDataService; import org.opensearch.index.mapper.BinaryFieldMapper; import org.opensearch.index.mapper.CompletionFieldMapper; +import org.opensearch.index.mapper.ConstantKeywordFieldMapper; import org.opensearch.index.mapper.ContentPath; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.FieldAliasMapper; @@ -778,6 +779,10 @@ public void testSupportedFieldTypes() throws IOException { source.put("doc_values", "true"); } + if (mappedType.getKey().equals(ConstantKeywordFieldMapper.CONTENT_TYPE) == true) { + source.put("value", "default_value"); + } + Mapper.Builder builder = mappedType.getValue().parse(fieldName, source, new MockParserContext()); FieldMapper mapper = (FieldMapper) builder.build(new BuilderContext(settings, new ContentPath()));