From 9ca048ef8ded3074bc4004dae28af95971e8a2fd Mon Sep 17 00:00:00 2001 From: mark-baas Date: Fri, 27 Oct 2023 17:00:59 +0700 Subject: [PATCH] [nl] improve CompoundAcceptor --- .../languagetool/rules/nl/CompoundAcceptor.java | 15 +++++++-------- .../rules/nl/CompoundAcceptorTest.java | 17 ++++++++--------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/languagetool-language-modules/nl/src/main/java/org/languagetool/rules/nl/CompoundAcceptor.java b/languagetool-language-modules/nl/src/main/java/org/languagetool/rules/nl/CompoundAcceptor.java index e80162a1bc95..89c689e92f14 100644 --- a/languagetool-language-modules/nl/src/main/java/org/languagetool/rules/nl/CompoundAcceptor.java +++ b/languagetool-language-modules/nl/src/main/java/org/languagetool/rules/nl/CompoundAcceptor.java @@ -38,13 +38,12 @@ public class CompoundAcceptor { // compound parts that need an 's' appended to be used as first part of the compound: - // "teit", "ing", "heid", "schap", "ker" - private final Set needsS1 = new HashSet<>(Arrays.asList( - "bedrijfs", "passagiers" //, "dorps", "gezichts", "lijdens", "besturings", "verbrandings", "bestemmings", "schoonheids" + private final Set needsS = new HashSet<>(Arrays.asList( + "bedrijfs", "passagiers", "dorps", "gezichts", "lijdens", "besturings", "verbrandings", "bestemmings", "schoonheids" )); // compound parts that must not have an 's' appended to be used as first part of the compound: - private final Set noS1 = new HashSet<>(Arrays.asList( - "sport", "woning" //, "kinder", "fractie", "zout", "schade", "energie", "gemeente", "dienst", "wereld", "telefoon", "winkel", "aandeel", "zwanger", "papier" + private final Set noS = new HashSet<>(Arrays.asList( + "woning", "kinder", "fractie", "schade", "energie", "gemeente", "dienst", "wereld", "telefoon", "aandeel", "zwanger", "papier" )); private final MorfologikDutchSpellerRule speller; @@ -79,11 +78,11 @@ boolean acceptCompound(String part1, String part2) throws IOException { boolean okWithDash = false; boolean okWithoutS = false; if (part1.endsWith("s")) { - okWithS = spellingOk(part1.replaceFirst("s$", "")) && spellingOk(part2) && needsS1.contains(part1); + okWithS = spellingOk(part1.replaceFirst("s$", "")) && spellingOk(part2) && needsS.contains(part1.toLowerCase()); } else if ( part1.endsWith("-")) { okWithDash = abbrevOk(part1) && spellingOk(part2); } else { - okWithoutS = spellingOk(part1) && spellingOk(part2) && noS1.contains(part1); + okWithoutS = spellingOk(part1) && spellingOk(part2) && noS.contains(part1.toLowerCase()); } //System.out.println(" okWithS: " + okWithS + ", okWithoutS " + okWithoutS); return okWithS || okWithDash || okWithoutS; @@ -96,7 +95,7 @@ private boolean abbrevOk(String nonCompound) { private boolean spellingOk(String nonCompound) throws IOException { AnalyzedSentence as = new AnalyzedSentence(new AnalyzedTokenReadings[] { - new AnalyzedTokenReadings(new AnalyzedToken(nonCompound, "FAKE_POS", "fakeLemma")) + new AnalyzedTokenReadings(new AnalyzedToken(nonCompound.toLowerCase(), "FAKE_POS", "fakeLemma")) }); RuleMatch[] matches = speller.match(as); return matches.length == 0; diff --git a/languagetool-language-modules/nl/src/test/java/org/languagetool/rules/nl/CompoundAcceptorTest.java b/languagetool-language-modules/nl/src/test/java/org/languagetool/rules/nl/CompoundAcceptorTest.java index 401b2e1aabc6..94ed8f2fe72b 100644 --- a/languagetool-language-modules/nl/src/test/java/org/languagetool/rules/nl/CompoundAcceptorTest.java +++ b/languagetool-language-modules/nl/src/test/java/org/languagetool/rules/nl/CompoundAcceptorTest.java @@ -37,25 +37,24 @@ public void testAcceptCompound() throws IOException { assertTrue(acceptor.acceptCompound("VRF-regels")); assertFalse(acceptor.acceptCompound("VRFregels")); - //assertTrue(acceptor.acceptCompound("winkeldiefstal")); - //assertFalse(acceptor.acceptCompound("winkelsdiefstal")); - assertTrue(acceptor.acceptCompound("bedrijfsregels")); assertFalse(acceptor.acceptCompound("bedrijfregels")); - //assertTrue(acceptor.acceptCompound("zwangerschap")); - //assertFalse(acceptor.acceptCompound("zwangersschap")); + assertTrue(acceptor.acceptCompound("Bedrijfsbrommer")); + assertFalse(acceptor.acceptCompound("Bedrijfbrommer")); + + assertTrue(acceptor.acceptCompound("zwangerschap")); + assertFalse(acceptor.acceptCompound("zwangersschap")); - //assertTrue(acceptor.acceptCompound("papierversnipperaar")); - //assertFalse(acceptor.acceptCompound("papiersversnipperaar")); + assertTrue(acceptor.acceptCompound("Papierversnipperaar")); + assertFalse(acceptor.acceptCompound("Papiersversnipperaar")); } @Test public void testAcceptCompoundInternal() throws IOException { CompoundAcceptor acceptor = new CompoundAcceptor(); assertTrue(acceptor.acceptCompound("passagiers", "schip")); - //assertTrue(acceptor.acceptCompound("papier", "versnipperaar")); - //assertTrue(acceptor.acceptCompound("winkel", "diefstal")); + assertTrue(acceptor.acceptCompound("papier", "versnipperaar")); } } \ No newline at end of file