diff --git a/src/attributes/style.tsv b/src/attributes/style.tsv index 47cddf28..d2053b4e 100644 --- a/src/attributes/style.tsv +++ b/src/attributes/style.tsv @@ -5,6 +5,7 @@ Azerbaidzan PROPN nonstandard Herzegovina PROPN nonstandard Tadzikistan PROPN nonstandard Tanzania PROPN nonstandard +Te PRON formal Tsekkoslovakia PROPN nonstandard ` PUNCT nonstandard agregaatti NOUN nonstandard diff --git a/src/python/omorfi/formats/apertium_formatter.py b/src/python/omorfi/formats/apertium_formatter.py index 0010df82..0abe0ce3 100755 --- a/src/python/omorfi/formats/apertium_formatter.py +++ b/src/python/omorfi/formats/apertium_formatter.py @@ -69,6 +69,7 @@ class ApertiumFormatter(Formatter): "enc", "ess", "f", + "frm", "gen", "ij", "ill", @@ -383,6 +384,10 @@ class ApertiumFormatter(Formatter): "LOCATIVE": "loc", "FTB3MAN": "", "FTB3man": "", + "NONSTANDARD": "use_nonstd", + "ARCHAIC": "use_archaic", + "DIALECTAL": "use_dialect", + "FORMAL": "frm", ".": "", "XForeign": "use_foreign", "X": "x", @@ -532,6 +537,8 @@ def wordmap2lexc(self, wordmap): wordmap["analysis"] += self.stuff2lexc(subcat) if wordmap["stub"] in ";:": wordmap["analysis"] += self.stuff2lexc("SENTENCE-BOUNDARY") + if wordmap["style"]: + wordmap["analysis"] += self.stuff2lexc(wordmap["style"]) # XXX: for now if wordmap["lemma"] in "¹²³½¼=≥µ#/%": wordmap["analysis"] += self.stuff2lexc("NOUN")