From 0470e9b583bf6aefb6f53b9b953bca96bcf64ecf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Goetz?= <onigoetz@onigoetz.ch>
Date: Sat, 19 Oct 2024 13:43:51 +0200
Subject: [PATCH] Optimize performance by replacing regex by char code checks
 (#326)

* Optimize performance by replacing regex by char code checks

* Fix formatting

* SonarQube recommended fix

* Add one more test

* Document character codes
---
 packages/benchmark-messageformat/README.md |  90 ++++----
 packages/messageformat/src/chars.ts        |   9 +
 packages/messageformat/src/matcher.ts      |  46 ----
 packages/messageformat/src/parser.test.ts  |   8 +-
 packages/messageformat/src/parser.ts       | 236 ++++++++++++++-------
 5 files changed, 217 insertions(+), 172 deletions(-)
 create mode 100644 packages/messageformat/src/chars.ts
 delete mode 100644 packages/messageformat/src/matcher.ts

diff --git a/packages/benchmark-messageformat/README.md b/packages/benchmark-messageformat/README.md
index 3e67cfb..f76c67c 100644
--- a/packages/benchmark-messageformat/README.md
+++ b/packages/benchmark-messageformat/README.md
@@ -16,11 +16,11 @@ Sources can be found in `src`, measure taken on 07/12/2023 with latest available
 | Npm Package                                           | Version    | Size | Comment     |
 | ----------------------------------------------------- | ---------- | ---- | ----------- |
 | @ffz/icu-msgparser (+ custom renderer)                | 2.0.0      | 9.4K |             |
-| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 1.0.0-rc.2 | 7.9K |             |
+| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 1.0.0-rc.2 | 8.1K |             |
 | @onigoetz/messageformat (+ @onigoetz/make-plural)     | 1.0.0-rc.2 | 11K  |             |
 | format-message-parse                                  | 6.2.4      | 22K  | Uses peg.js |
 | @onigoetz/messageformat (+ make-plural)               | 1.0.0-rc.2 | 23K  |             |
-| @onigoetz/messageformat (+ @phensley/plurals)         | 1.0.0-rc.2 | 40K  |             |
+| @onigoetz/messageformat (+ @phensley/plurals)         | 1.0.0-rc.2 | 41K  |             |
 | intl-messageformat                                    | 10.5.14    | 55K  | Uses peg.js |
 | @phensley/messageformat                               | 1.9.0      | 54K  |             |
 | @messageformat/core                                   | 3.4.0      | 74K  | Uses peg.js |
@@ -50,7 +50,7 @@ The benchmark is applied to 4 different strings, which for the simple cases shou
 >
 > - Node.js v20.9.0
 > - Apple M2 CPU
-> - October 17, 2024
+> - October 18, 2024
 
 ## Simple String
 
@@ -62,16 +62,16 @@ const input = [`Hello, world!`, {}];
 
 | Name                                                  |   ops/sec | MoE     | Runs sampled |
 | ----------------------------------------------------- | --------: | ------- | ------------ |
-| **format-message-parse**                              | 8,586,016 | ± 0.16% | 97           |
-| @onigoetz/messageformat (+ @onigoetz/make-plural)     | 8,321,701 | ± 0.78% | 95           |
-| @phensley/messageformat                               | 7,906,469 | ± 0.25% | 98           |
-| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 8,041,469 | ± 2.71% | 90           |
-| @onigoetz/messageformat (+ @phensley/plurals)         | 8,137,500 | ± 5.18% | 97           |
-| @onigoetz/messageformat (+ make-plural)               | 7,854,733 | ± 1.93% | 97           |
-| @ffz/icu-msgparser (+ custom renderer)                | 5,668,605 | ± 0.34% | 97           |
-| @messageformat/core                                   | 1,684,757 | ± 0.25% | 99           |
-| intl-messageformat                                    |   235,223 | ± 0.63% | 93           |
-| globalize                                             |    36,694 | ± 0.81% | 98           |
+| **format-message-parse**                              | 8,895,545 | ± 0.23% | 94           |
+| @onigoetz/messageformat (+ @phensley/plurals)         | 8,596,907 | ± 0.16% | 99           |
+| @onigoetz/messageformat (+ @onigoetz/make-plural)     | 8,583,838 | ± 0.14% | 101          |
+| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 8,396,261 | ± 2.03% | 94           |
+| @onigoetz/messageformat (+ make-plural)               | 8,222,727 | ± 1.48% | 97           |
+| @phensley/messageformat                               | 8,079,695 | ± 0.22% | 99           |
+| @ffz/icu-msgparser (+ custom renderer)                | 5,662,192 | ± 0.16% | 97           |
+| @messageformat/core                                   | 1,715,496 | ± 0.13% | 97           |
+| intl-messageformat                                    |   240,408 | ± 0.63% | 93           |
+| globalize                                             |    37,391 | ± 0.31% | 96           |
 
 ## With one variable
 
@@ -88,16 +88,16 @@ const input = [
 
 | Name                                                      |   ops/sec | MoE     | Runs sampled |
 | --------------------------------------------------------- | --------: | ------- | ------------ |
-| **@onigoetz/messageformat (+ @phensley/plurals)**         | 4,339,447 | ± 0.72% | 92           |
-| **@onigoetz/messageformat (+ @onigoetz/intl-formatters)** | 4,307,751 | ± 1.23% | 96           |
-| @onigoetz/messageformat (+ @onigoetz/make-plural)         | 4,277,114 | ± 0.64% | 100          |
-| @onigoetz/messageformat (+ make-plural)                   | 4,258,599 | ± 0.54% | 99           |
-| format-message-parse                                      | 3,828,913 | ± 1.50% | 98           |
-| @ffz/icu-msgparser (+ custom renderer)                    | 3,265,017 | ± 0.34% | 101          |
-| @phensley/messageformat                                   | 3,182,662 | ± 0.75% | 99           |
-| @messageformat/core                                       |   845,946 | ± 0.17% | 100          |
-| intl-messageformat                                        |   206,447 | ± 1.45% | 95           |
-| globalize                                                 |    36,014 | ± 0.23% | 95           |
+| **@onigoetz/messageformat (+ @onigoetz/intl-formatters)** | 6,414,862 | ± 0.11% | 98           |
+| @onigoetz/messageformat (+ make-plural)                   | 6,380,084 | ± 0.31% | 98           |
+| @onigoetz/messageformat (+ @phensley/plurals)             | 6,233,602 | ± 0.22% | 98           |
+| @onigoetz/messageformat (+ @onigoetz/make-plural)         | 6,228,841 | ± 0.20% | 100          |
+| format-message-parse                                      | 3,978,459 | ± 0.19% | 99           |
+| @phensley/messageformat                                   | 3,399,981 | ± 0.13% | 99           |
+| @ffz/icu-msgparser (+ custom renderer)                    | 3,361,547 | ± 0.27% | 100          |
+| @messageformat/core                                       |   880,358 | ± 0.26% | 100          |
+| intl-messageformat                                        |   216,110 | ± 0.54% | 95           |
+| globalize                                                 |    36,506 | ± 0.32% | 97           |
 
 ## With plurals
 
@@ -116,16 +116,16 @@ const input = [
 
 | Name                                                  | ops/sec | MoE     | Runs sampled |
 | ----------------------------------------------------- | ------: | ------- | ------------ |
-| **@onigoetz/messageformat (+ make-plural)**           | 618,628 | ± 0.20% | 100          |
-| @onigoetz/messageformat (+ @phensley/plurals)         | 614,416 | ± 0.56% | 100          |
-| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 546,253 | ± 0.88% | 99           |
-| @phensley/messageformat                               | 540,674 | ± 0.22% | 98           |
-| @messageformat/core                                   | 180,091 | ± 1.23% | 98           |
-| @onigoetz/messageformat (+ @onigoetz/make-plural)     | 139,554 | ± 0.62% | 96           |
-| @ffz/icu-msgparser (+ custom renderer)                | 127,049 | ± 0.83% | 99           |
-| format-message-parse                                  |  81,202 | ± 0.25% | 98           |
-| intl-messageformat                                    |  47,462 | ± 2.53% | 89           |
-| globalize                                             |  26,501 | ± 1.10% | 97           |
+| **@onigoetz/messageformat (+ @phensley/plurals)**     | 940,533 | ± 0.19% | 98           |
+| @onigoetz/messageformat (+ make-plural)               | 919,852 | ± 0.14% | 98           |
+| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 772,695 | ± 0.25% | 98           |
+| @phensley/messageformat                               | 551,128 | ± 0.13% | 96           |
+| @messageformat/core                                   | 185,781 | ± 0.11% | 98           |
+| @onigoetz/messageformat (+ @onigoetz/make-plural)     | 152,541 | ± 0.10% | 100          |
+| @ffz/icu-msgparser (+ custom renderer)                | 130,129 | ± 0.10% | 97           |
+| format-message-parse                                  |  81,703 | ± 0.28% | 97           |
+| intl-messageformat                                    |  48,687 | ± 2.22% | 92           |
+| globalize                                             |  27,151 | ± 0.17% | 97           |
 
 ## With select and plurals
 
@@ -171,15 +171,15 @@ const input = [`
   `
 ```
 
-| Name                                                  | ops/sec | MoE     | Runs sampled |
-| ----------------------------------------------------- | ------: | ------- | ------------ |
-| **@onigoetz/messageformat (+ @phensley/plurals)**     |  96,722 | ± 0.80% | 93           |
-| @onigoetz/messageformat (+ make-plural)               |  95,344 | ± 2.71% | 100          |
-| @onigoetz/messageformat (+ @onigoetz/intl-formatters) |  92,448 | ± 1.42% | 93           |
-| @onigoetz/messageformat (+ @onigoetz/make-plural)     |  62,556 | ± 0.74% | 96           |
-| @phensley/messageformat                               |  52,335 | ± 0.63% | 102          |
-| @messageformat/core                                   |  30,852 | ± 0.13% | 97           |
-| @ffz/icu-msgparser (+ custom renderer)                |  29,109 | ± 0.80% | 95           |
-| intl-messageformat                                    |  16,495 | ± 1.59% | 95           |
-| format-message-parse                                  |  16,293 | ± 3.06% | 93           |
-| globalize                                             |   8,606 | ± 0.15% | 99           |
+| Name                                                  | ops/sec | MoE      | Runs sampled |
+| ----------------------------------------------------- | ------: | -------- | ------------ |
+| **@onigoetz/messageformat (+ @phensley/plurals)**     | 140,415 | ± 0.07%  | 99           |
+| @onigoetz/messageformat (+ make-plural)               | 136,452 | ± 0.11%  | 98           |
+| @onigoetz/messageformat (+ @onigoetz/intl-formatters) | 135,955 | ± 0.11%  | 99           |
+| @onigoetz/messageformat (+ @onigoetz/make-plural)     |  79,580 | ± 0.07%  | 98           |
+| @phensley/messageformat                               |  53,396 | ± 0.16%  | 101          |
+| @ffz/icu-msgparser (+ custom renderer)                |  29,484 | ± 0.72%  | 95           |
+| @messageformat/core                                   |  29,510 | ± 0.93%  | 95           |
+| intl-messageformat                                    |  15,706 | ± 1.99%  | 89           |
+| format-message-parse                                  |  15,356 | ± 12.31% | 85           |
+| globalize                                             |   8,250 | ± 1.98%  | 91           |
diff --git a/packages/messageformat/src/chars.ts b/packages/messageformat/src/chars.ts
new file mode 100644
index 0000000..7fd6387
--- /dev/null
+++ b/packages/messageformat/src/chars.ts
@@ -0,0 +1,9 @@
+export const CHAR_OPEN = 123; // {
+export const CHAR_CLOSE = 125; // }
+export const CHAR_SEP = 44; // ,
+export const CHAR_SUB_VAR = 35; // #
+export const CHAR_ESCAPE = 39; // '
+
+export const CHAR_0 = 48; // 0
+export const CHAR_9 = 57; // 9
+export const CHAR_MINUS = 45; // -
diff --git a/packages/messageformat/src/matcher.ts b/packages/messageformat/src/matcher.ts
deleted file mode 100644
index c826148..0000000
--- a/packages/messageformat/src/matcher.ts
+++ /dev/null
@@ -1,46 +0,0 @@
-import { Context } from "./types.js";
-
-const hasStickyRegexp = (() => {
-  try {
-    const r = new RegExp(".", "y");
-    return !!r;
-  } catch (err) {
-    /* istanbul ignore next */
-    return false;
-  }
-})();
-
-/**
- * Construct a regular expression for use in a StickyMatcher.
- * Construct a regular expression for use in a SubstringMatcher.
- * @param str
- */
-export function compile(pattern: string): RegExp {
-  return hasStickyRegexp
-    ? new RegExp(pattern, "y")
-    : new RegExp(`^${pattern}`, "g");
-}
-
-export const match: (pattern: RegExp, r: Context) => string | undefined =
-  hasStickyRegexp
-    ? function match(pattern, r) {
-        pattern.lastIndex = r.i;
-        const raw = pattern.exec(r.msg);
-        if (raw) {
-          // set the start of range to the sticky index
-          r.i = pattern.lastIndex;
-          return raw[0];
-        }
-        return undefined;
-      }
-    : function match(pattern, r) {
-        pattern.lastIndex = 0;
-        const s = r.msg.substring(r.i, r.l);
-        const raw = pattern.exec(s);
-        if (raw) {
-          // skip ahead by the number of characters matched
-          r.i += pattern.lastIndex;
-          return raw[0];
-        }
-        return undefined;
-      };
diff --git a/packages/messageformat/src/parser.test.ts b/packages/messageformat/src/parser.test.ts
index 3c3ac87..0ca5217 100644
--- a/packages/messageformat/src/parser.test.ts
+++ b/packages/messageformat/src/parser.test.ts
@@ -285,6 +285,12 @@ test.group("parse()", () => {
     expect(() => parse("{n,plural}")).toThrow("expected sub-messages");
   });
 
+  test("throws on incorrect sub-messages", ({ expect }) => {
+    expect(() => parse("{n,plural,zeroo {test} other {test}}")).toThrow(
+      "expected selector to be one of 'zero', 'one', 'two', 'few', 'many', 'other' or '=' followed by a digit at position 15 but found  . \"…zeroo[ ]{test…\"",
+    );
+  });
+
   test("throws on bad sub-messages", ({ expect }) => {
     expect(() => parse("{n,select,this thing}")).toThrow("expected {");
     expect(() => parse("{n,select,this {thing")).toThrow("expected }");
@@ -316,7 +322,7 @@ test.group("parse()", () => {
 
   test("throws on missing offset number", ({ expect }) => {
     expect(() => parse("{n,plural,offset: other{n}")).toThrow(
-      "expected sub-message selector at position 10 but found o.",
+      'expected offset number at position 17 but found  . "…fset:[ ]other…"',
     );
   });
 });
diff --git a/packages/messageformat/src/parser.ts b/packages/messageformat/src/parser.ts
index b87311a..0bac3c4 100644
--- a/packages/messageformat/src/parser.ts
+++ b/packages/messageformat/src/parser.ts
@@ -1,4 +1,13 @@
-import { compile, match } from "./matcher.js";
+import {
+  CHAR_0,
+  CHAR_9,
+  CHAR_CLOSE,
+  CHAR_ESCAPE,
+  CHAR_MINUS,
+  CHAR_OPEN,
+  CHAR_SEP,
+  CHAR_SUB_VAR,
+} from "./chars.js";
 import {
   Context,
   MessageOpType,
@@ -11,22 +20,7 @@ import {
   VariableToken,
 } from "./types.js";
 
-const OPEN = "{";
-const CLOSE = "}";
-const SEP = ",";
-const SUB_VAR = "#";
-const ESCAPE = "'";
-const OFFSET = compile(/offset:-?\d+/.source);
-const MULTI_SPACE = compile("\\s+");
-
-// Matches [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
-// could be replaced in the future with [^\p{Pattern_Syntax}\p{Pattern_White_Space}]+ once crafty includes babel when compiling TypeScript
-const IDENTIFIER = compile(
-  // eslint-disable-next-line no-control-regex
-  /[^\u0009-\u000d \u0085\u200e\u200f\u2028\u2029\u0021-\u002f\u003a-\u0040\u005b-\u005e\u0060\u007b-\u007e\u00a1-\u00a7\u00a9\u00ab\u00ac\u00ae\u00b0\u00b1\u00b6\u00bb\u00bf\u00d7\u00f7\u2010-\u2027\u2030-\u203e\u2041-\u2053\u2055-\u205e\u2190-\u245f\u2500-\u2775\u2794-\u2bff\u2e00-\u2e7f\u3001-\u3003\u3008-\u3020\u3030\ufd3e\ufd3f\ufe45\ufe46]+/
-    .source,
-);
-const PLURAL = compile(/(=\d+(\.\d+)?)|zero|one|two|few|many|other/.source);
+const PLURAL = /^(?:(=\d+(\.\d+)?)|zero|one|two|few|many|other)$/;
 
 /**
  * Writes a nice code frame to show where an error happened.
@@ -36,12 +30,9 @@ const PLURAL = compile(/(=\d+(\.\d+)?)|zero|one|two|few|many|other/.source);
 function codeFrame(context: Context) {
   const beforeLength = Math.max(0, context.i - 5);
 
-  const before = context.msg.substr(
-    beforeLength,
-    Math.min(context.i - beforeLength, 5),
-  );
+  const before = context.msg.substring(beforeLength, beforeLength + 5);
   const current = context.msg[context.i];
-  const after = context.msg.substr(context.i + 1, 5);
+  const after = context.msg.substring(context.i + 1, context.i + 6);
 
   return `"…${before}[${current}]${after}…"`;
 }
@@ -73,12 +64,67 @@ function expected(char: string, context: Context): SyntaxError {
   );
 }
 
+function peek(context: Context): number {
+  return context.msg.charCodeAt(context.i + 1);
+}
+
+function get(context: Context): number {
+  return context.msg.charCodeAt(context.i);
+}
+
+function isIdentifierChar(char: number): boolean {
+  return (
+    char !== CHAR_OPEN &&
+    char !== CHAR_CLOSE &&
+    char !== CHAR_SEP &&
+    char !== CHAR_SUB_VAR &&
+    char !== CHAR_ESCAPE &&
+    !isWhitespaceChar(char)
+  );
+}
+
+function readIdentifier(context: Context): string {
+  const start = context.i;
+
+  while (
+    context.i < context.l &&
+    isIdentifierChar(context.msg.charCodeAt(context.i))
+  ) {
+    ++context.i;
+  }
+
+  return context.msg.substring(start, context.i);
+}
+
+export function isWhitespaceChar(code: number): boolean {
+  return (
+    (code >= 0x09 && code <= 0x0d) ||
+    code === 0x20 || // space
+    code === 0x85 || // …
+    code === 0xa0 || // NBSP
+    code === 0x180e || // MONGOLIAN VOWEL SEPARATOR
+    (code >= 0x2000 && code <= 0x200d) || // en Quad, Em Quad, en Space, Em Space, Three-Per-Em space, Four-Per-Em Space, Six-Per-Em Space, Figure Space, Punctuation Space, Thin Space, Hair Space, Zero Width Space, Zero Width Non-Joiner, Zero-width Joiner
+    code === 0x2028 || // Line Separator
+    code === 0x2029 || // Paragraph Separator
+    code === 0x202f || // Narrow No-Break Space
+    code === 0x205f || // Medium Mathematical Space
+    code === 0x2060 || // Word Joiner
+    code === 0x3000 || // Ideographic Space
+    code === 0xfeff // Zero width no-break space
+  );
+}
+
 /**
  * Eat all available spaces and advance the context
  * @param context
  */
-function skipSpace(context: Context): boolean {
-  return match(MULTI_SPACE, context) !== undefined;
+function skipSpace(context: Context): void {
+  while (
+    context.i < context.l &&
+    isWhitespaceChar(context.msg.charCodeAt(context.i))
+  ) {
+    ++context.i;
+  }
 }
 
 /**
@@ -91,9 +137,12 @@ function skipSpace(context: Context): boolean {
  * @param char
  * @param context
  */
-function skipSeparator(char: string, context: Context) {
-  if (char !== SEP) {
-    throw expected(`${SEP} or ${CLOSE}`, context);
+function skipSeparator(char: number, context: Context) {
+  if (char !== CHAR_SEP) {
+    throw expected(
+      `${String.fromCharCode(CHAR_SEP)} or ${String.fromCharCode(CHAR_CLOSE)}`,
+      context,
+    );
   }
   ++context.i;
   skipSpace(context);
@@ -121,57 +170,58 @@ function parseText(context: Context, specialHash = false): string {
   let out = "";
 
   while (context.i < context.l) {
-    const char = context.msg[context.i];
-    if (char === OPEN || char === CLOSE || (specialHash && char === SUB_VAR)) {
+    const char = get(context);
+    if (
+      char === CHAR_OPEN ||
+      char === CHAR_CLOSE ||
+      (specialHash && char === CHAR_SUB_VAR)
+    ) {
       break;
     }
 
-    if (char === ESCAPE) {
-      let next = context.msg[++context.i];
-      if (next === ESCAPE) {
+    if (char === CHAR_ESCAPE) {
+      ++context.i;
+      let next = get(context);
+      if (next === CHAR_ESCAPE) {
         // Escaped Escape Character
-        out += next;
+        out += String.fromCharCode(next);
         ++context.i;
       } else if (
-        next === OPEN ||
-        next === CLOSE ||
-        (specialHash && next === SUB_VAR)
+        next === CHAR_OPEN ||
+        next === CHAR_CLOSE ||
+        (specialHash && next === CHAR_SUB_VAR)
       ) {
         // Special Character
-        out += next;
+        out += String.fromCharCode(next);
         while (++context.i < context.l) {
-          next = context.msg[context.i];
-          if (next === ESCAPE) {
+          next = get(context);
+          if (next === CHAR_ESCAPE) {
             // Check for an escaped escape character, and don't
             // stop if we encounter one.
-            next = context.msg[context.i + 1];
-            if (next === ESCAPE) {
-              out += next;
+            next = peek(context);
+            if (next === CHAR_ESCAPE) {
+              out += String.fromCharCode(next);
               ++context.i;
             } else {
               ++context.i;
               break;
             }
           } else {
-            out += next;
+            out += String.fromCharCode(next);
           }
         }
       } else {
-        out += char;
+        out += String.fromCharCode(char);
       }
     } else {
       ++context.i;
-      out += char;
+      out += String.fromCharCode(char);
     }
   }
 
   return out;
 }
 
-function isNot(context: Context, char: string) {
-  return context.msg[context.i] !== char;
-}
-
 /**
  * Parses a single sub-message block, including any trailing space
  *
@@ -187,8 +237,8 @@ function parseSubmessage(
 ): number {
   const startAt = context.nextIndex;
   skipSpace(context);
-  if (isNot(context, OPEN)) {
-    throw expected(OPEN, context);
+  if (get(context) !== CHAR_OPEN) {
+    throw expected(String.fromCharCode(CHAR_OPEN), context);
   }
 
   ++context.i;
@@ -197,8 +247,8 @@ function parseSubmessage(
   // eslint-disable-next-line @swissquote/swissquote/@typescript-eslint/no-use-before-define
   parseAST(context, parent, specialHash);
 
-  if (isNot(context, CLOSE)) {
-    throw expected(CLOSE, context);
+  if (get(context) !== CHAR_CLOSE) {
+    throw expected(String.fromCharCode(CHAR_CLOSE), context);
   }
 
   ++context.i;
@@ -226,17 +276,24 @@ function parseSubmessages(
   context: Context,
   parent: VariableToken,
   specialHash: boolean,
-  matcher: RegExp,
+  isPlural: boolean,
 ): Submessages {
   const submessages: Submessages = {} as Submessages;
 
   // Continue until we reach the end of the string or a block closing
-  while (context.i < context.l && context.msg[context.i] !== CLOSE) {
-    const selector = match(matcher, context);
+  while (context.i < context.l && get(context) !== CHAR_CLOSE) {
+    const selector = readIdentifier(context);
     if (!selector) {
       throw expected("sub-message selector", context);
     }
 
+    if (isPlural && !PLURAL.exec(selector)) {
+      throw expected(
+        "selector to be one of 'zero', 'one', 'two', 'few', 'many', 'other' or '=' followed by a digit",
+        context,
+      );
+    }
+
     submessages[selector] = parseSubmessage(context, parent, specialHash);
   }
 
@@ -247,6 +304,10 @@ function parseSubmessages(
   return submessages;
 }
 
+function isDigit(char: number): boolean {
+  return (char >= CHAR_0 && char <= CHAR_9) || char === CHAR_MINUS;
+}
+
 /**
  * Parse the offset part of a plural, if it is present
  *
@@ -259,11 +320,26 @@ function parseSubmessages(
  */
 function parseOffset(context: Context): number {
   let n = 0;
-  const m = match(OFFSET, context);
-  if (m) {
-    // This must parse successfully since it is constrained by the regexp match
-    n = parseInt(m.split(":")[1], 10);
+
+  if (context.msg.substring(context.i, context.i + 7) === "offset:") {
+    context.i += 7;
+
+    const start = context.i;
+    while (
+      context.i < context.l &&
+      isDigit(context.msg.charCodeAt(context.i))
+    ) {
+      ++context.i;
+    }
+
+    const extracted = context.msg.substring(start, context.i);
+    if (!extracted) {
+      throw expected("offset number", context);
+    }
+
+    n = parseInt(extracted, 10);
   }
+
   return n;
 }
 
@@ -284,8 +360,8 @@ function parseOffset(context: Context): number {
  * @param current The token we're preparing
  */
 function parsePlural(context: Context, current: PluralToken) {
-  const char = context.msg[context.i];
-  if (char === CLOSE) {
+  const char = get(context);
+  if (char === CHAR_CLOSE) {
     throw expected("sub-messages", context);
   }
 
@@ -299,7 +375,7 @@ function parsePlural(context: Context, current: PluralToken) {
   }
 
   // Parse available options
-  current.m = parseSubmessages(context, current, true, PLURAL);
+  current.m = parseSubmessages(context, current, true, true);
   return current;
 }
 
@@ -314,21 +390,21 @@ function parsePlural(context: Context, current: PluralToken) {
  * @param current The token we're preparing
  */
 function parseSelect(context: Context, current: SelectToken) {
-  const char = context.msg[context.i];
-  if (char === CLOSE) {
+  const char = get(context);
+  if (char === CHAR_CLOSE) {
     throw expected("sub-messages", context);
   }
 
   skipSeparator(char, context);
 
   // Parse available options
-  current.m = parseSubmessages(context, current, false, IDENTIFIER);
+  current.m = parseSubmessages(context, current, false, false);
   return current;
 }
 
 function parseSimple(context: Context, current: SimpleToken) {
-  const char = context.msg[context.i];
-  if (char === CLOSE) {
+  const char = get(context);
+  if (char === CHAR_CLOSE) {
     return current;
   }
 
@@ -357,7 +433,7 @@ function parseElement(context: Context) {
   skipSpace(context);
 
   // Get the variable this block refers to
-  const id = match(IDENTIFIER, context);
+  const id = readIdentifier(context);
   if (!id) {
     throw expected("placeholder id", context);
   }
@@ -368,8 +444,8 @@ function parseElement(context: Context) {
   skipSpace(context);
 
   // If we're at the end of the block this is a single argument
-  const char = context.msg[context.i];
-  if (char === CLOSE) {
+  const char = get(context);
+  if (char === CHAR_CLOSE) {
     ++context.i;
     add(context, token);
     return;
@@ -378,7 +454,7 @@ function parseElement(context: Context) {
   skipSeparator(char, context);
 
   // Since we're still in the block, it must have a type
-  const type = match(IDENTIFIER, context);
+  const type = readIdentifier(context);
   if (!type) {
     throw expected("type", context);
   }
@@ -415,8 +491,8 @@ function parseElement(context: Context) {
   skipSpace(context);
 
   // At this stage, we have to be at the end of the current block
-  if (isNot(context, CLOSE)) {
-    throw expected(CLOSE, context);
+  if (get(context) !== CHAR_CLOSE) {
+    throw expected(String.fromCharCode(CHAR_CLOSE), context);
   }
 
   ++context.i;
@@ -435,17 +511,17 @@ function parseAST(
 ): Token[] {
   while (context.i < context.l) {
     const start = context.i;
-    const char = context.msg[start];
+    const char = get(context);
 
-    if (char === CLOSE) {
+    if (char === CHAR_CLOSE) {
       if (!parent) {
-        throw unexpected(char, context.i);
+        throw unexpected(String.fromCharCode(char), context.i);
       }
       break;
     }
 
     // If we're in a 'plural' or 'selectordinal', '#' refers to the parent variable, (plus or minus its offset)
-    if (specialHash && char === SUB_VAR) {
+    if (specialHash && char === CHAR_SUB_VAR) {
       ++context.i;
       // We can safely cast here as `specialHash` is only true if we are in a Plural or SelectOrdinal
       // and both have an offset
@@ -454,7 +530,7 @@ function parseAST(
         v: parent.v,
         o: (parent as PluralToken).o,
       });
-    } else if (char === OPEN) {
+    } else if (char === CHAR_OPEN) {
       // If we see a block start, we send it to `parseElement` and add it to the array if an element was found
       parseElement(context);
     } else {
@@ -467,7 +543,7 @@ function parseAST(
 
     // Infinite Loop Protection
     if (context.i === start) {
-      throw unexpected(char, context.i);
+      throw unexpected(String.fromCharCode(char), context.i);
     }
   }