Skip to content

Commit

Permalink
Complete backwards the annotation starting from (start - 1) character (
Browse files Browse the repository at this point in the history
…#44)

* start by (start - 1) character for backward annotation completion

* test for complete span backwards
  • Loading branch information
EgoLaparra authored Jul 25, 2019
1 parent b73a6d2 commit b478afd
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ class TemporalNeuralParser(modelStream: Option[InputStream] = None) {

// Complete the annotation if the span does not cover the whole token (but only expand over "O" labels)
val doExpand = (i: Int) => wordCharIndices.contains(i) && predictedLabels.lift(i - snippetStart).contains("O")
val wordStart = Iterator.from(start, -1).takeWhile(doExpand).toSeq.lastOption.getOrElse(start)
val wordStart = Iterator.from(start - 1, -1).takeWhile(doExpand).toSeq.lastOption.getOrElse(start)
val wordEnd = Iterator.from(end, +1).takeWhile(doExpand).toSeq.lastOption.map(_ + 1).getOrElse(end)
(wordStart, wordEnd, label)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class TemporalNeuralParserTest extends FunSuite with TypesSuite {
|refugees arrived in Gambella, Ethiopia, bringing the
|total number of new arrivals since September 2016
|to 77,874.
|FOOD PROGRAMME Rome, 2016 The designations employed and the presentation of material in this
""".stripMargin.trim,
Array(
(0, 10), // 2018-10-10
Expand All @@ -40,6 +41,7 @@ class TemporalNeuralParserTest extends FunSuite with TypesSuite {
(181, 197), // since last March
(198, 354), // A substantial ... pound.
(355, 519), // Between 1 ... 77,874
(520, 614), // FOOD PROGRAMME ... in this
),
// use a SimpleInterval here so that there's no associated character span
SimpleInterval(dct.start, dct.end),
Expand Down Expand Up @@ -94,6 +96,12 @@ class TemporalNeuralParserTest extends FunSuite with TypesSuite {
assert(sinceSep2016.end === dct.end)
}

test("fill-incomplete-span-backwards") {
val Some(year2016: Interval) = batch(7).headOption
assert(year2016.charSpan === Some((541, 545)))
assert(year2016 === SimpleInterval.of(2016))
}

test("no-duplicate-ids") {
// in July 2019, for the text below, the parser generated [After even][Next tual][After ly] and the code for
// expanding to word boundaries expanded these all to have the span, resulting in <entity> nodes with identical IDs
Expand Down

0 comments on commit b478afd

Please sign in to comment.