Issue/#845 Make matchmaker games also affect global rating (#849)

* Refactor player rating queries to fetch all players in the game * Refactor connection acquisition to reuse connections more * Refactor persistence to use executemany * Update global ratings when playing matchmaker games * Refactor game rating adjustment to use trueskill * Refactor `Rating` to server.ratings * Refactor GameRater to rate a game multiple times with different ratings * Add tests for rating adjustment * Add type annotations to Search * Refactor initialization checking and rating_type checking * Refactor rating adjustment and initial rating creation * Add constraint that global must be lower than matchmaker to be adjusted * Refactor newbie ratings to constants
FAForever · Nov 27, 2021 · 5f7b72b · 5f7b72b
1 parent aac3d2c
commit 5f7b72b
Show file tree

Hide file tree

Showing 15 changed files with 1,132 additions and 488 deletions.
diff --git a/server/config.py b/server/config.py
@@ -82,6 +82,7 @@ def __init__(self):
         self.NEWBIE_MIN_GAMES = 10
         self.START_RATING_MEAN = 1500
         self.START_RATING_DEV = 500
+        self.RATING_ADJUSTMENT_MAX_RATING = 1400
         self.HIGH_RATED_PLAYER_MIN_RATING = 1400
         self.TOP_PLAYER_MIN_RATING = 2000
 

diff --git a/server/db/models.py b/server/db/models.py
@@ -125,10 +125,13 @@
     Column("color",         Integer, nullable=False),
     Column("team",          Integer, nullable=False),
     Column("place",         Integer, nullable=False),
+    # DEPRECATED: Use leaderboard_rating_journal instead. These columns should 
+    # be dropped at some point.
     Column("mean",          Float,   nullable=False),
     Column("deviation",     Float,   nullable=False),
     Column("after_mean",    Float),
     Column("after_deviation", Float),
+    # End DEPRECATED
     Column("score",         Integer),
     Column("scoreTime",     TIMESTAMP),
     Column("result",        Enum(GameOutcome)),

diff --git a/server/games/game.py b/server/games/game.py
@@ -877,7 +877,7 @@ def is_visible_to_player(self, player: Player) -> bool:
 
         if (
             self.enforce_rating_range
-            and player.get_displayed_rating(self.rating_type)
+            and player.ratings[self.rating_type].displayed()
             not in self.displayed_rating_range
         ):
             return False

diff --git a/server/ladder_service.py b/server/ladder_service.py
@@ -400,7 +400,7 @@ async def start_game(
                 limit=config.LADDER_ANTI_REPETITION_LIMIT
             )
             rating = min(
-                player.get_displayed_rating(queue.rating_type)
+                player.ratings[queue.rating_type].displayed()
                 for player in all_players
             )
             pool = queue.get_map_pool_for_rating(rating)

diff --git a/server/matchmaker/search.py b/server/matchmaker/search.py
@@ -5,9 +5,9 @@
 import time
 from typing import Any, Callable, List, Optional, Tuple
 
-from trueskill import Rating, quality
+import trueskill
 
-from server.rating import RatingType
+from server.rating import Rating, RatingType
 
 from ..config import config
 from ..decorators import with_logger
@@ -48,15 +48,15 @@ def __init__(
         # Precompute this
         self.quality_against_self = self.quality_with(self)
 
-    def adjusted_rating(self, player: Player):
+    def adjusted_rating(self, player: Player) -> Rating:
         """
         Returns an adjusted mean with a simple linear interpolation between current mean and a specified base mean
         """
         mean, dev = player.ratings[self.rating_type]
         game_count = player.game_count[self.rating_type]
         adjusted_mean = ((config.NEWBIE_MIN_GAMES - game_count) * config.NEWBIE_BASE_MEAN
                          + game_count * mean) / config.NEWBIE_MIN_GAMES
-        return adjusted_mean, dev
+        return Rating(adjusted_mean, dev)
 
     def is_newbie(self, player: Player) -> bool:
         return player.game_count[self.rating_type] <= config.NEWBIE_MIN_GAMES
@@ -83,7 +83,7 @@ def has_top_player(self) -> bool:
         return max_rating >= config.TOP_PLAYER_MIN_RATING
 
     @property
-    def ratings(self):
+    def ratings(self) -> List[Rating]:
         ratings = []
         for player, rating in zip(self.players, self.raw_ratings):
             # New players (less than config.NEWBIE_MIN_GAMES games) match against less skilled opponents
@@ -93,26 +93,26 @@ def ratings(self):
         return ratings
 
     @property
-    def cumulative_rating(self):
+    def cumulative_rating(self) -> float:
         return sum(self.displayed_ratings)
 
     @property
-    def average_rating(self):
+    def average_rating(self) -> float:
         return statistics.mean(self.displayed_ratings)
 
     @property
-    def raw_ratings(self):
+    def raw_ratings(self) -> List[Rating]:
         return [player.ratings[self.rating_type] for player in self.players]
 
     @property
-    def displayed_ratings(self):
+    def displayed_ratings(self) -> List[float]:
         """
         The client always displays mean - 3 * dev as a player's rating.
         So generally this is perceived as a player's true rating.
         """
-        return [mean - 3 * dev for mean, dev in self.raw_ratings]
+        return [rating.displayed() for rating in self.raw_ratings]
 
-    def _nearby_rating_range(self, delta):
+    def _nearby_rating_range(self, delta: int) -> Tuple[int, int]:
         """
         Returns 'boundary' mu values for player matching. Adjust delta for
         different game qualities.
@@ -122,12 +122,12 @@ def _nearby_rating_range(self, delta):
         return rounded_mu - delta, rounded_mu + delta
 
     @property
-    def boundary_80(self):
+    def boundary_80(self) -> Tuple[int, int]:
         """ Achieves roughly 80% quality. """
         return self._nearby_rating_range(200)
 
     @property
-    def boundary_75(self):
+    def boundary_75(self) -> Tuple[int, int]:
         """ Achieves roughly 75% quality. FIXME - why is it MORE restrictive??? """
         return self._nearby_rating_range(100)
 
@@ -181,23 +181,23 @@ def quality_with(self, other: "Search") -> float:
         assert all(other.raw_ratings)
         assert other.players
 
-        team1 = [Rating(*rating) for rating in self.ratings]
-        team2 = [Rating(*rating) for rating in other.ratings]
+        team1 = [trueskill.Rating(*rating) for rating in self.ratings]
+        team2 = [trueskill.Rating(*rating) for rating in other.ratings]
 
-        return quality([team1, team2])
+        return trueskill.quality([team1, team2])
 
     @property
-    def is_matched(self):
+    def is_matched(self) -> bool:
         return self._match.done() and not self._match.cancelled()
 
-    def done(self):
+    def done(self) -> bool:
         return self._match.done()
 
     @property
-    def is_cancelled(self):
+    def is_cancelled(self) -> bool:
         return self._match.cancelled()
 
-    def matches_with(self, other: "Search"):
+    def matches_with(self, other: "Search") -> bool:
         """
         Determine if this search is compatible with other given search according
         to both wishes.
@@ -297,23 +297,23 @@ def players(self) -> List[Player]:
         return list(itertools.chain(*[s.players for s in self.searches]))
 
     @property
-    def ratings(self):
+    def ratings(self) -> List[Rating]:
         return list(itertools.chain(*[s.ratings for s in self.searches]))
 
     @property
-    def cumulative_rating(self):
+    def cumulative_rating(self) -> float:
         return sum(s.cumulative_rating for s in self.searches)
 
     @property
-    def average_rating(self):
+    def average_rating(self) -> float:
         return get_average_rating(self.searches)
 
     @property
-    def raw_ratings(self):
+    def raw_ratings(self) -> List[Rating]:
         return list(itertools.chain(*[s.raw_ratings for s in self.searches]))
 
     @property
-    def displayed_ratings(self):
+    def displayed_ratings(self) -> List[float]:
         return list(itertools.chain(*[s.displayed_ratings for s in self.searches]))
 
     @property

diff --git a/server/players.py b/server/players.py
@@ -88,10 +88,6 @@ def faction(self, value: Union[str, int, Faction]) -> None:
         else:
             self._faction = Faction.from_value(value)
 
-    def get_displayed_rating(self, rating_type: str) -> float:
-        mean, dev = self.ratings[rating_type]
-        return mean - 3 * dev
-
     def power(self) -> int:
         """An artifact of the old permission system. The client still uses this
         number to determine if a player gets a special category in the user list

diff --git a/server/rating.py b/server/rating.py
@@ -3,15 +3,37 @@
 """
 
 from dataclasses import dataclass
-from typing import Dict, Optional, Set, Tuple, TypeVar, Union
+from typing import Dict, NamedTuple, Optional, Set, Tuple, Union
 
 import trueskill
 
 from server.config import config
 from server.weakattr import WeakAttribute
 
-Rating = Tuple[float, float]
-V = TypeVar("V")
+AnyRating = Union["Rating", trueskill.Rating, Tuple[float, float]]
+
+
+class Rating(NamedTuple):
+    """
+    A container for holding a mean, deviation pair and computing the displayed
+    rating.
+
+    Uses mean, dev to differentiate from the trueskill.Rating type which uses
+    mu, sigma.
+    """
+    mean: float
+    dev: float
+
+    def of(value: AnyRating) -> "Rating":
+        if isinstance(value, trueskill.Rating):
+            return Rating(value.mu, value.sigma)
+        elif isinstance(value, Rating):
+            return value
+
+        return Rating(*value)
+
+    def displayed(self) -> float:
+        return self.mean - 3 * self.dev
 
 
 @dataclass(init=False)
@@ -64,23 +86,14 @@ def __init__(self, leaderboards: Dict[str, Leaderboard], init: bool = True):
             _ = self[RatingType.GLOBAL]
             _ = self[RatingType.LADDER_1V1]
 
-    def __setitem__(
-        self,
-        rating_type: str,
-        value: Union[Rating, trueskill.Rating],
-    ) -> None:
-        if isinstance(value, trueskill.Rating):
-            rating = (value.mu, value.sigma)
-        else:
-            rating = value
-
+    def __setitem__(self, rating_type: str, value: AnyRating) -> None:
         self.transient.discard(rating_type)
         # This could be optimized further by walking backwards along the
         # initialization chain and only unmarking the ratings we come accross,
         # but this adds complexity so we won't bother unless it really becomes
         # a performance bottleneck, which is unlikely.
         self.clean.clear()
-        super().__setitem__(rating_type, rating)
+        super().__setitem__(rating_type, Rating.of(value))
 
     def __getitem__(
         self,
@@ -119,23 +132,25 @@ def _get_initial_rating(
 
         history.add(rating_type)
         init_rating_type = leaderboard.initializer.technical_name
-        mean, dev = self.__getitem__(init_rating_type, history=history)
+        rating = self.__getitem__(init_rating_type, history=history)
 
-        if dev > 250 or init_rating_type in self.transient:
-            return (mean, dev)
+        if rating.dev > 250 or init_rating_type in self.transient:
+            return rating
 
-        return (mean, min(dev + 150, 250))
+        return Rating(rating.mean, min(rating.dev + 150, 250))
 
     def update(self, other: Dict[str, Rating]):
         self.transient -= set(other)
         self.clean.clear()
         if isinstance(other, PlayerRatings):
             self.transient |= other.transient
+        else:
+            other = {key: Rating.of(value) for key, value in other.items()}
         super().update(other)
 
 
 def default_rating() -> Rating:
-    return (config.START_RATING_MEAN, config.START_RATING_DEV)
+    return Rating(config.START_RATING_MEAN, config.START_RATING_DEV)
 
 
 class InclusiveRange():