Merge pull request #525 from taynaud/master

Allow to specify match-features in RankProfile
vespa-engine · Aug 8, 2023 · 7df2e72 · 7df2e72
2 parents c375b4e + 90bfead
commit 7df2e72
Show file tree

Hide file tree

Showing 2 changed files with 28 additions and 12 deletions.
diff --git a/vespa/package.py b/vespa/package.py
@@ -842,7 +842,8 @@ def __init__(
 
         This is the initial ranking performed on all matching documents. Check the `Vespa documentation <https://docs.vespa.ai/en/reference/schema-reference.html#firstphase-rank>`__`
         for more detailed information about first phase ranking configuration.
-        
+
+
         :param expression: Specify the ranking expression to be used for first phase of ranking. Check also the
             `Vespa documentation <https://docs.vespa.ai/en/reference/ranking-expressions.html>`__`
             for ranking expression.
@@ -868,7 +869,7 @@ def __eq__(self, other: object) -> bool:
             and self.keep_rank_count == other.keep_rank_count
             and self.rank_score_drop_limit == other.rank_score_drop_limit
         )
-    
+
     def __repr__(self) -> str:
         return "{0}({1}, {2}, {3})".format(
             self.__class__.__name__,
@@ -918,6 +919,7 @@ class RankProfileFields(TypedDict, total=False):
     constants: Dict
     functions: List[Function]
     summary_features: List
+    match_features: List
     second_phase: SecondPhaseRanking
     weight: List[Tuple[str, int]]
     rank_type: List[Tuple[str, str]]
@@ -930,11 +932,12 @@ def __init__(
         self,
         name: str,
         # Allow a str object as expression for backwards compatibility
-        first_phase: Union[str, FirstPhaseRanking], 
+        first_phase: Union[str, FirstPhaseRanking],
         inherits: Optional[str] = None,
         constants: Optional[Dict] = None,
         functions: Optional[List[Function]] = None,
         summary_features: Optional[List] = None,
+        match_features: Optional[List] = None,
         second_phase: Optional[SecondPhaseRanking] = None,
         **kwargs: Unpack[RankProfileFields],
     ) -> None:
@@ -961,6 +964,9 @@ def __init__(
         :param summary_features: List of rank features to be included with each hit.
             `More info <https://docs.vespa.ai/en/reference/schema-reference.html#summary-features>`__`
             about summary features.
+        :param match_features: List of rank features to be included with each hit.
+            `More info <https://docs.vespa.ai/en/reference/schema-reference.html#match-features>`__`
+            about match features.
         :param second_phase: Optional config specifying the second phase of ranking.
             See :class:`SecondPhaseRanking`.
         :key weight: A list of tuples containing the field and their weight
@@ -970,10 +976,10 @@ def __init__(
             `More info <https://docs.vespa.ai/en/reference/schema-reference.html#rank-properties>`__` about rank-properties.
 
         >>> RankProfile(name = "default", first_phase = "nativeRank(title, body)")
-        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, None, None, None)
+        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, None, None, None, None)
 
         >>> RankProfile(name = "new", first_phase = "BM25(title)", inherits = "default")
-        RankProfile('new', 'BM25(title)', 'default', None, None, None, None, None, None, None, None)
+        RankProfile('new', 'BM25(title)', 'default', None, None, None, None, None, None, None, None, None)
 
         >>> RankProfile(
         ...     name = "new",
@@ -982,7 +988,7 @@ def __init__(
         ...     constants={"TOKEN_NONE": 0, "TOKEN_CLS": 101, "TOKEN_SEP": 102},
         ...     summary_features=["BM25(title)"]
         ... )
-        RankProfile('new', 'BM25(title)', 'default', {'TOKEN_NONE': 0, 'TOKEN_CLS': 101, 'TOKEN_SEP': 102}, None, ['BM25(title)'], None, None, None, None, None)
+        RankProfile('new', 'BM25(title)', 'default', {'TOKEN_NONE': 0, 'TOKEN_CLS': 101, 'TOKEN_SEP': 102}, None, ['BM25(title)'], None, None, None, None, None, None)
 
         >>> RankProfile(
         ...     name="bert",
@@ -1002,41 +1008,42 @@ def __init__(
         ...     ],
         ...     summary_features=["question_length", "doc_length"]
         ... )
-        RankProfile('bert', 'bm25(title) + bm25(body)', 'default', {'TOKEN_NONE': 0, 'TOKEN_CLS': 101, 'TOKEN_SEP': 102}, [Function('question_length', 'sum(map(query(query_token_ids), f(a)(a > 0)))', None), Function('doc_length', 'sum(map(attribute(doc_token_ids), f(a)(a > 0)))', None)], ['question_length', 'doc_length'], SecondPhaseRanking('1.25 * bm25(title) + 3.75 * bm25(body)', 10), None, None, None, None)
+        RankProfile('bert', 'bm25(title) + bm25(body)', 'default', {'TOKEN_NONE': 0, 'TOKEN_CLS': 101, 'TOKEN_SEP': 102}, [Function('question_length', 'sum(map(query(query_token_ids), f(a)(a > 0)))', None), Function('doc_length', 'sum(map(attribute(doc_token_ids), f(a)(a > 0)))', None)], ['question_length', 'doc_length'], None, SecondPhaseRanking('1.25 * bm25(title) + 3.75 * bm25(body)', 10), None, None, None, None)
 
         >>> RankProfile(
         ...     name = "default",
         ...     first_phase = "nativeRank(title, body)",
         ...     weight = [("title", 200), ("body", 100)]
         ... )
-        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, [('title', 200), ('body', 100)], None, None, None)
+        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, [('title', 200), ('body', 100)], None, None, None)
 
         >>> RankProfile(
         ...     name = "default",
         ...     first_phase = "nativeRank(title, body)",
         ...     rank_type = [("body", "about")]
         ... )
-        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, [('body', 'about')], None, None)
+        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, None, [('body', 'about')], None, None)
 
         >>> RankProfile(
         ...     name = "default",
         ...     first_phase = "nativeRank(title, body)",
         ...     rank_properties = [("fieldMatch(title).maxAlternativeSegmentations", "10")]
         ... )
-        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, None, [('fieldMatch(title).maxAlternativeSegmentations', '10')], None)
+        RankProfile('default', 'nativeRank(title, body)', None, None, None, None, None, None, None, None, [('fieldMatch(title).maxAlternativeSegmentations', '10')], None)
 
         >>> RankProfile(
         ...    name = "default",
         ...    first_phase = FirstPhaseRanking(expression="nativeRank(title, body)", keep_rank_count=50)
         ... )
-        RankProfile('default', FirstPhaseRanking('nativeRank(title, body)', 50, None), None, None, None, None, None, None, None, None, None)
+        RankProfile('default', FirstPhaseRanking('nativeRank(title, body)', 50, None), None, None, None, None, None, None, None, None, None, None)
         """
         self.name = name
         self.first_phase = first_phase
         self.inherits = kwargs.get("inherits", inherits)
         self.constants = kwargs.get("constants", constants)
         self.functions = kwargs.get("functions", functions)
         self.summary_features = kwargs.get("summary_features", summary_features)
+        self.match_features = kwargs.get("match_features", match_features)
         self.second_phase = kwargs.get("second_phase", second_phase)
         self.weight = kwargs.get("weight", None)
         self.rank_type = kwargs.get("rank_type", None)
@@ -1053,6 +1060,7 @@ def __eq__(self, other: object) -> bool:
             and self.constants == other.constants
             and self.functions == other.functions
             and self.summary_features == other.summary_features
+            and self.match_features == other.match_features
             and self.second_phase == other.second_phase
             and self.weight == other.weight
             and self.rank_type == other.rank_type
@@ -1061,14 +1069,15 @@ def __eq__(self, other: object) -> bool:
         )
 
     def __repr__(self) -> str:
-        return "{0}({1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11})".format(
+        return "{0}({1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12})".format(
             self.__class__.__name__,
             repr(self.name),
             repr(self.first_phase),
             repr(self.inherits),
             repr(self.constants),
             repr(self.functions),
             repr(self.summary_features),
+            repr(self.match_features),
             repr(self.second_phase),
             repr(self.weight),
             repr(self.rank_type),

diff --git a/vespa/templates/schema.txt b/vespa/templates/schema.txt
@@ -253,6 +253,13 @@ schema {{ schema_name }} {
         {% endfor %}
         }
         {% endif %}
+        {% if value.match_features %}
+        match-features {
+        {% for feature in value.match_features %}
+            {{ feature }}
+        {% endfor %}
+        }
+        {% endif %}
         {% if value.weight %}
         {% for weight in value.weight %}
         weight {{ weight.0 }}: {{ weight.1 }}