From 108116bfb404462d09fdca0aa12a7054e073bd7f Mon Sep 17 00:00:00 2001 From: Steven Bethard Date: Fri, 9 Aug 2019 12:02:53 -0700 Subject: [PATCH] Handles cases where non-numbers (unparseable inputs) are passed to WordsToNumber. --- .../org/clulab/timenorm/scate/WordsToNumber.scala | 14 ++++++++------ .../clulab/timenorm/scfg/SynchronousParser.scala | 11 +++++++++++ .../clulab/timenorm/scate/WordsToNumberTest.scala | 2 ++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/main/scala/org/clulab/timenorm/scate/WordsToNumber.scala b/src/main/scala/org/clulab/timenorm/scate/WordsToNumber.scala index 74cc9ba7..94cdc51c 100644 --- a/src/main/scala/org/clulab/timenorm/scate/WordsToNumber.scala +++ b/src/main/scala/org/clulab/timenorm/scate/WordsToNumber.scala @@ -7,6 +7,8 @@ import org.apache.commons.io.IOUtils import org.clulab.timenorm.scfg.SynchronousParser.Tree import org.clulab.timenorm.scfg.{SynchronousGrammar, SynchronousParser} +import scala.util.{Failure, Success} + object WordsToNumber { def apply(languageCode: String): WordsToNumber = languageCode match { @@ -16,14 +18,14 @@ object WordsToNumber { class WordsToNumber(grammarStream: InputStream) extends Function[Array[String], Option[Long]] { - private val grammar = SynchronousGrammar.fromString(IOUtils.toString(grammarStream, Charset.forName("ascii"))) - private val sourceWords = grammar.sourceSymbols() - private val parser = new SynchronousParser(grammar) + private val parser = new SynchronousParser(SynchronousGrammar.fromString( + IOUtils.toString(grammarStream, Charset.forName("ascii")))) override def apply(words: Array[String]): Option[Long] = { - if (!words.forall(sourceWords)) None else parser.parseAll(words) match { - case Array(tree) => Some(this.toDigits(tree).foldLeft(0L) { case (sum, digit) => 10L * sum + digit }) - case trees => throw new UnsupportedOperationException( + parser.tryParseAll(words.toIndexedSeq) match { + case Failure(_) => None + case Success(IndexedSeq(tree)) => Some(this.toDigits(tree).foldLeft(0L) { case (sum, digit) => 10L * sum + digit }) + case Success(trees) => throw new UnsupportedOperationException( s"Ambiguous grammar for ${words.toList}. Parses:\n${trees.mkString("\n")}") } } diff --git a/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala b/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala index be1d3d8f..7ea091bc 100644 --- a/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala +++ b/src/main/scala/org/clulab/timenorm/scfg/SynchronousParser.scala @@ -3,6 +3,7 @@ package org.clulab.timenorm.scfg import scala.collection.immutable.IndexedSeq import scala.collection.mutable import scala.collection.mutable.ListBuffer +import scala.util.{Try, control} /** * A parser for synchronous grammars. @@ -54,6 +55,16 @@ class SynchronousParser(grammar: SynchronousGrammar) { trees } + /** + * Attempt to parse the source tokens into a tree of non-terminals and target tokens. + * + * @param sourceTokens The source tokens to be parsed. + * @return Success(trees) if the source tokens could be parsed, Failure otherwise. + */ + def tryParseAll(sourceTokens: IndexedSeq[String]): Try[IndexedSeq[Tree.NonTerminal]] = { + control.Exception.catching(classOf[UnsupportedOperationException]).withTry(parseAll(sourceTokens)) + } + private def parseChart(sourceTokens: IndexedSeq[String]): Array[Array[ChartEntry]] = { val nTokens = sourceTokens.size val chart = Array.tabulate(nTokens + 1, nTokens) { diff --git a/src/test/scala/org/clulab/timenorm/scate/WordsToNumberTest.scala b/src/test/scala/org/clulab/timenorm/scate/WordsToNumberTest.scala index c1528d2f..2d198bd2 100644 --- a/src/test/scala/org/clulab/timenorm/scate/WordsToNumberTest.scala +++ b/src/test/scala/org/clulab/timenorm/scate/WordsToNumberTest.scala @@ -36,5 +36,7 @@ class WordsToNumberTest extends FunSuite with TypesSuite { test("English invalid numbers") { assert(enTextToNumber(Array("several")) === None) + assert(enTextToNumber(Array("and")) === None) + assert(enTextToNumber(Array.empty) === None) } }