Skip to content

Commit

Permalink
Merge pull request #140 from martindevans/grammar_exception_types
Browse files Browse the repository at this point in the history
grammar_exception_types
  • Loading branch information
martindevans authored Sep 2, 2023
2 parents 1533ee7 + af680ac commit 4d079bf
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 55 deletions.
21 changes: 0 additions & 21 deletions LLama/Exceptions/GrammarFormatException.cs

This file was deleted.

125 changes: 125 additions & 0 deletions LLama/Exceptions/GrammarFormatExceptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
using System;

namespace LLama.Exceptions;

/// <summary>
/// Base class for all grammar exceptions
/// </summary>
public abstract class GrammarFormatException
: Exception
{
internal GrammarFormatException(string message)
: base(message)
{
}
}


/// <summary>
/// An incorrect number of characters were encountered while parsing a hex literal
/// </summary>
public class GrammarUnexpectedHexCharsCount
: GrammarFormatException
{
internal GrammarUnexpectedHexCharsCount(int size, string source)
: base($"Expecting {size} hex chars at {source}")
{
}
}

/// <summary>
/// Failed to parse a "name" element when one was expected
/// </summary>
public class GrammarExpectedName
: GrammarFormatException
{
internal GrammarExpectedName(string source)
: base($"Expecting name at {source}")
{
}
}

/// <summary>
/// An unexpected character was encountered after an escape sequence
/// </summary>
public class GrammarUnknownEscapeCharacter
: GrammarFormatException
{
internal GrammarUnknownEscapeCharacter(string source)
: base($"Unknown escape at {source}")
{
}
}

/// <summary>
/// End-of-file was encountered while parsing
/// </summary>
public class GrammarUnexpectedEndOfInput
: GrammarFormatException
{
internal GrammarUnexpectedEndOfInput()
: base($"Unexpected end of input")
{
}
}

/// <summary>
/// A specified string was expected when parsing
/// </summary>
public class GrammarExpectedNext
: GrammarFormatException
{
internal GrammarExpectedNext(string expected, string source)
: base($"Expected '{expected}' at {source}")
{
}
}

/// <summary>
/// A specified character was expected to preceded another when parsing
/// </summary>
public class GrammarExpectedPrevious
: GrammarFormatException
{
internal GrammarExpectedPrevious(string expected, string source)
: base($"Expecting preceding item to be '{expected}' at {source}")
{
}
}


/// <summary>
/// A CHAR_ALT was created without a preceding CHAR element
/// </summary>
public class GrammarUnexpectedCharAltElement
: GrammarFormatException
{
internal GrammarUnexpectedCharAltElement(string ruleId, int index)
: base($"LLamaGrammarElementType.CHAR_ALT without preceding char: {ruleId},{index}")
{
}
}

/// <summary>
/// A CHAR_RNG was created without a preceding CHAR element
/// </summary>
public class GrammarUnexpectedCharRngElement
: GrammarFormatException
{
internal GrammarUnexpectedCharRngElement(string ruleId, int index)
: base($"LLamaGrammarElementType.CHAR_RNG_UPPER without preceding char: {ruleId},{index}")
{
}
}

/// <summary>
/// An END was encountered before the last element
/// </summary>
public class GrammarUnexpectedEndElement
: GrammarFormatException
{
internal GrammarUnexpectedEndElement(string ruleId, int index)
: base($"Unexpected LLamaGrammarElementType.END: {ruleId},{index}")
{
}
}
23 changes: 9 additions & 14 deletions LLama/Grammars/GBNFGrammarParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private uint ParseHex(ref ReadOnlySpan<byte> src, int size)

if (pos != end)
{
throw new GrammarFormatException($"Expecting {size} hex chars at {Encoding.UTF8.GetString(src.ToArray())}");
throw new GrammarUnexpectedHexCharsCount(size, Encoding.UTF8.GetString(src.ToArray()));
}
src = src.Slice(pos);
return value;
Expand Down Expand Up @@ -146,7 +146,7 @@ private ReadOnlySpan<byte> ParseName(ReadOnlySpan<byte> src)
}
if (pos == 0)
{
throw new GrammarFormatException($"Expecting name at {Encoding.UTF8.GetString(src.ToArray())}");
throw new GrammarExpectedName(Encoding.UTF8.GetString(src.ToArray()));
}
return src.Slice(pos);
}
Expand Down Expand Up @@ -177,15 +177,15 @@ private uint ParseChar(ref ReadOnlySpan<byte> src)
case (byte)']':
return chr;
default:
throw new GrammarFormatException("Unknown escape at " + Encoding.UTF8.GetString(src.ToArray()));
throw new GrammarUnknownEscapeCharacter(Encoding.UTF8.GetString(src.ToArray()));
}
}
else if (!src.IsEmpty)
{
return DecodeUTF8(ref src);
}

throw new GrammarFormatException("Unexpected end of input");
throw new GrammarUnexpectedEndOfInput();
}

private ReadOnlySpan<byte> ParseSequence(
Expand Down Expand Up @@ -258,17 +258,13 @@ private ReadOnlySpan<byte> ParseSequence(
// output reference to synthesized rule
outElements.Add(new LLamaGrammarElement(LLamaGrammarElementType.RULE_REF, subRuleId));
if (pos[0] != ')')
{
throw new GrammarFormatException($"Expecting ')' at {Encoding.UTF8.GetString(pos.ToArray())}");
}
throw new GrammarExpectedNext(")", Encoding.UTF8.GetString(pos.ToArray()));
pos = ParseSpace(pos.Slice(1), isNested);
}
else if (pos[0] == '*' || pos[0] == '+' || pos[0] == '?') // repetition operator
{
if (lastSymStart == outElements.Count)
{
throw new GrammarFormatException($"Expecting preceding item to */+/? at {Encoding.UTF8.GetString(pos.ToArray())}");
}
throw new GrammarExpectedPrevious("*/+/?", Encoding.UTF8.GetString(pos.ToArray()));

// apply transformation to previous symbol (lastSymStart to end) according to
// rewrite rules:
Expand Down Expand Up @@ -349,9 +345,8 @@ private ReadOnlySpan<byte> ParseRule(ParseState state, ReadOnlySpan<byte> src)
string name = Encoding.UTF8.GetString(src.Slice(0, nameLen).ToArray());

if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '='))
{
throw new GrammarFormatException($"Expecting ::= at {Encoding.UTF8.GetString(pos.ToArray())}");
}
throw new GrammarExpectedNext("::=", Encoding.UTF8.GetString(pos.ToArray()));

pos = ParseSpace(pos.Slice(3), true);

pos = ParseAlternates(state, pos, name, ruleId, false);
Expand All @@ -366,7 +361,7 @@ private ReadOnlySpan<byte> ParseRule(ParseState state, ReadOnlySpan<byte> src)
}
else if (!pos.IsEmpty)
{
throw new GrammarFormatException($"Expecting newline or end at {Encoding.UTF8.GetString(pos.ToArray())}");
throw new GrammarExpectedNext("newline or EOF", Encoding.UTF8.GetString(pos.ToArray()));
}
return ParseSpace(pos, true);
}
Expand Down
23 changes: 10 additions & 13 deletions LLama/Grammars/Grammar.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ public override string ToString()
private void PrintGrammar(StringBuilder output)
{
for (var i = 0; i < Rules.Count; i++)
PrintRule(output, (uint)i, Rules[i]);
PrintRule(output, Rules[i]);
}

private void PrintRule(StringBuilder output, uint ruleId, GrammarRule rule)
private void PrintRule(StringBuilder output, GrammarRule rule)
{
output.Append($"{rule.Name} ::= ");

Expand All @@ -82,37 +82,34 @@ private void PrintRule(StringBuilder output, uint ruleId, GrammarRule rule)
var elem = rule.Elements[i];
switch (elem.Type)
{
// GrammarRule has already verified that END is not being misused, no need to check again
case LLamaGrammarElementType.END:
throw new GrammarFormatException($"Unexpected end of rule: {ruleId}, {i}");
break;

case LLamaGrammarElementType.ALT:
output.Append("| ");
break;

case LLamaGrammarElementType.RULE_REF:
output.Append($"{Rules[(int)elem.Value].Name} ");
break;

case LLamaGrammarElementType.CHAR:
output.Append('[');
PrintGrammarChar(output, elem.Value);
break;

case LLamaGrammarElementType.CHAR_NOT:
output.Append("[^");
PrintGrammarChar(output, elem.Value);
break;

case LLamaGrammarElementType.CHAR_RNG_UPPER:
if (i == 0 || !rule.Elements[i - 1].IsCharElement())
{
throw new GrammarFormatException(
$"LLamaGrammarElementType.CHAR_RNG_UPPER without preceding char: {ruleId},{i}");
}
output.Append('-');
PrintGrammarChar(output, elem.Value);
break;

case LLamaGrammarElementType.CHAR_ALT:
if (i == 0 || !rule.Elements[i - 1].IsCharElement())
{
throw new GrammarFormatException(
$"LLamaGrammarElementType.CHAR_ALT without preceding char: {ruleId},{i}");
}
PrintGrammarChar(output, elem.Value);
break;

Expand Down
13 changes: 6 additions & 7 deletions LLama/Grammars/GrammarRule.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using LLama.Exceptions;
using LLama.Native;

namespace LLama.Grammars
Expand Down Expand Up @@ -36,7 +37,7 @@ public GrammarRule(string name, IReadOnlyList<LLamaGrammarElement> elements)
private static void Validate(IReadOnlyList<LLamaGrammarElement> elements, string name)
{
if (elements.Count == 0)
throw new ArgumentException("Cannot create a GrammaRule with zero elements", nameof(elements));
throw new ArgumentException("Cannot create a GrammarRule with zero elements", nameof(elements));
if (elements[elements.Count - 1].Type != LLamaGrammarElementType.END)
throw new ArgumentException("Last grammar element must be END", nameof(elements));

Expand All @@ -46,18 +47,16 @@ private static void Validate(IReadOnlyList<LLamaGrammarElement> elements, string
{
case LLamaGrammarElementType.END:
if (i != elements.Count - 1)
throw new ArgumentException("Found more than one END grammar element", nameof(elements));
throw new GrammarUnexpectedEndElement(name, i);
continue;

case LLamaGrammarElementType.CHAR_RNG_UPPER:
if (i == 0 || !elements[i - 1].IsCharElement())
throw new ArgumentException($"LLamaGrammarElementType.CHAR_RNG_UPPER without preceding char: {name},{i}", nameof(elements));
throw new GrammarUnexpectedCharRngElement(name, i);
break;
case LLamaGrammarElementType.CHAR_ALT:
if (i == 0 || !elements[i - 1].IsCharElement())
{
throw new ArgumentException($"LLamaGrammarElementType.CHAR_ALT without preceding char: {name},{i}", nameof(elements));
}
throw new GrammarUnexpectedCharAltElement(name, i);
break;

case LLamaGrammarElementType.ALT:
Expand All @@ -67,7 +66,7 @@ private static void Validate(IReadOnlyList<LLamaGrammarElement> elements, string
break;

default:
throw new ArgumentException($"Unknown grammar element type: '{elements[i].Type}'");
throw new ArgumentException($"Unknown grammar element type: '{elements[i].Type}'", nameof(elements));
}
}
}
Expand Down

0 comments on commit 4d079bf

Please sign in to comment.