diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 0000000..0fcf39e --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,96 @@ +name: Docker + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +on: + schedule: + - cron: '19 18 * * *' + push: + branches: [ "uber" ] + # Publish semver tags as releases. + tags: [ 'v*.*.*' ] + pull_request: + branches: [ "uber" ] + +env: + # Use docker.io for Docker Hub if empty + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Install the cosign tool except on PR + # https://github.com/sigstore/cosign-installer + - name: Install cosign + if: github.event_name != 'pull_request' + uses: sigstore/cosign-installer@f3c664df7af409cb4873aa5068053ba9d61a57b6 #v2.6.0 + with: + cosign-release: 'v1.11.0' + + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into registry ${{ env.REGISTRY }} + if: github.event_name != 'pull_request' + uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Extract metadata (tags, labels) for Docker + # https://github.com/docker/metadata-action + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + with: + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + + +# # Sign the resulting Docker image digest except on PRs. +# # This will only write to the public Rekor transparency log when the Docker +# # repository is public to avoid leaking data. If you would like to publish +# # transparency data even for private images, pass --force to cosign below. +# # https://github.com/sigstore/cosign +# - name: Sign the published Docker image +# if: ${{ github.event_name != 'pull_request' }} +# env: +# COSIGN_EXPERIMENTAL: "true" +# # This step uses the identity token to provision an ephemeral certificate +# # against the sigstore community Fulcio instance. +# run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign {}@${{ steps.build-and-push.outputs.digest }} diff --git a/Dockerfile b/Dockerfile index 851b411..91f118c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,9 @@ -FROM java:8-jdk - -RUN set -ex \ - && apt-get update \ - && apt-get install flip +FROM openjdk:11 ADD . /app RUN set -ex \ && cd /app \ - && flip -u ./gradlew \ - && ./gradlew build + && ./gradlew build --no-daemon EXPOSE 8080 WORKDIR /app diff --git a/README.md b/README.md index 01a8e49..5f74eb1 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,27 @@ -## Це — веб-служба (REST API) для аналізу українських текстів (NLP) за допомогою LanguageTool. ## +## REST API для аналізу українських текстів (NLP) за допомогою LanguageTool -## This project provides REST API for analyzing Ukrainian texts with LanguageTool. ## +This project provides REST API for analyzing Ukrainian texts with LanguageTool. +### Quick Start -### Як встановити ### -* Встановити java (JDK 8 або новішу) -* Клонувати проект з github -* Запустити `./gradlew bootRun` -* Документація в JSON: http://localhost:8080/v2/api-docs -* Документація з UI: http://localhost:8080/swagger-ui.html +The `/uber` endpoint accepts a batch of texts, processes all of them parallel and returns [sentences, tokens, lemmas] for each text. -### Як використовувати через Docker ### +Start the web server on port 8080: ``` -docker build -t api_nlp_uk:latest . -docker run -it -p 8080:8080 api_nlp_uk:latest -curl -X POST -H "Content-Type: application/json" -d "{'text': 'Сьогодні у продажі. 12-те зібрання творів 1969 р. І. П. Котляревського.'}" http://localhost:8080/lemmatize/ +docker run -p 8080:8080 ghcr.io/proger/api_nlp_uk:uber ``` -Або можна викоритсовувати Docker image який підтримується в іншому репозиторії. +Run example query: ``` -docker run -it -p 5000:5000 chaliy/api_nlp_uk:latest +curl -s -X POST -H "Content-Type: application/json" -d "['Привіт, котанче. Як справи?', 'Ну шо, приїхали?']" http://localhost:8080/uber \ + | jq -c '.[]' +``` + +This will respond like: +``` +[[["Привіт, котанче. ",["Привіт",","," ","котанче","."," "],["привіт",","," ","котанче","."," "]],["Як справи?",["Як"," ","справи","?"],["як"," ","справа","?"]]],[["Ну шо, приїхали?",["Ну"," ","шо",","," ","приїхали","?"],["ну"," ","шо",","," ","приїхати","?"]]]] ``` Вільно розповсюджується за умов ліцензії GPL версії 3. diff --git a/build.gradle b/build.gradle index ba8c208..5254cb1 100644 --- a/build.gradle +++ b/build.gradle @@ -2,17 +2,17 @@ buildscript { repositories { mavenLocal() maven { url "https://repo.grails.org/grails/core" } + mavenCentral() } dependencies { classpath "org.grails:grails-gradle-plugin:$grailsVersion" // classpath "org.grails.plugins:hibernate5:6.0.0" - classpath "org.grails.plugins:views-gradle:1.1.2" + classpath "org.grails.plugins:views-gradle:2.3.2" } } version "1.0" group "languagetool_nlp" - apply plugin:"eclipse" apply plugin:"idea" apply plugin:"war" @@ -25,6 +25,7 @@ war.archiveName='languagetool.war' repositories { mavenLocal() maven { url "https://repo.grails.org/grails/core" } + mavenCentral() } dependencyManagement { @@ -35,44 +36,44 @@ dependencyManagement { } dependencies { - compile "org.languagetool:language-uk:3.7" + implementation "org.languagetool:language-uk:6.0" -// compile "org.grails.plugins:swaggydoc-grails3:0.28.0" - compile "io.springfox:springfox-swagger2:2.6.1" - compile "io.springfox:springfox-swagger-ui:2.6.1" +// implementation "org.grails.plugins:swaggydoc-grails3:0.28.0" + implementation "io.springfox:springfox-swagger2:3.0.0" + implementation "io.springfox:springfox-swagger-ui:3.0.0" - compile "org.grails:grails-dependencies" + implementation "org.grails:grails-dependencies" - compile "org.springframework.boot:spring-boot-starter-logging" - compile "org.springframework.boot:spring-boot-autoconfigure" - compile "org.grails:grails-core" - compile "org.springframework.boot:spring-boot-starter-actuator" - compile "org.springframework.boot:spring-boot-starter-tomcat" - compile "org.grails:grails-plugin-url-mappings" - compile "org.grails:grails-plugin-rest" - compile "org.grails:grails-plugin-codecs" - compile "org.grails:grails-plugin-interceptors" - compile "org.grails:grails-plugin-services" -// compile "org.grails:grails-plugin-datasource" -// compile "org.grails:grails-plugin-databinding" - compile "org.grails:grails-plugin-async" - compile "org.grails:grails-web-boot" - compile "org.grails:grails-logging" - compile "org.grails.plugins:cache" -// compile "org.grails.plugins:hibernate5" -// compile "org.hibernate:hibernate-core:5.1.1.Final" -// compile "org.hibernate:hibernate-ehcache:5.1.1.Final" - compile "org.grails.plugins:views-json" - compile "org.grails.plugins:views-json-templates" + implementation "org.springframework.boot:spring-boot-starter-logging" + implementation "org.springframework.boot:spring-boot-autoconfigure" + implementation "org.grails:grails-core" + implementation "org.springframework.boot:spring-boot-starter-actuator" + implementation "org.springframework.boot:spring-boot-starter-tomcat" + implementation "org.grails:grails-plugin-url-mappings" + implementation "org.grails:grails-plugin-rest" + implementation "org.grails:grails-plugin-codecs" + implementation "org.grails:grails-plugin-interceptors" + implementation "org.grails:grails-plugin-services" +// implementation "org.grails:grails-plugin-datasource" +// implementation "org.grails:grails-plugin-databinding" + // implementation "org.grails:grails-plugin-async" + implementation "org.grails:grails-web-boot" + implementation "org.grails:grails-logging" + implementation "org.grails.plugins:cache" +// implementation "org.grails.plugins:hibernate5" +// implementation "org.hibernate:hibernate-core:5.1.1.Final" +// implementation "org.hibernate:hibernate-ehcache:5.1.1.Final" + implementation "org.grails.plugins:views-json" + implementation "org.grails.plugins:views-json-templates" console "org.grails:grails-console" profile "org.grails.profiles:rest-api" // provided "org.codehaus.groovy:groovy-ant" // runtime "com.h2database:h2" - testCompile "org.grails:grails-plugin-testing" - testCompile "org.grails.plugins:geb" -// testCompile "org.grails:grails-datastore-rest-client" - testRuntime "org.seleniumhq.selenium:selenium-htmlunit-driver:2.47.1" - testRuntime "net.sourceforge.htmlunit:htmlunit:2.18" +// testCompile "org.grails:grails-plugin-testing" +// testCompile "org.grails.plugins:geb" +//// testCompile "org.grails:grails-datastore-rest-client" +// testRuntime "org.seleniumhq.selenium:selenium-htmlunit-driver:2.47.1" +// testRuntime "net.sourceforge.htmlunit:htmlunit:2.18" } bootRun { diff --git a/gradle.properties b/gradle.properties index 8df217e..8629881 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1,2 +1,2 @@ -grailsVersion=3.2.0 -gradleWrapperVersion=3.0 +groovyVersion=3.0.7 +grailsVersion=5.2.4 diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index deedc7f..249e583 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 86185e7..ae04661 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,5 @@ -#Fri Nov 27 23:09:32 CET 2015 distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5.1-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-3.0-bin.zip diff --git a/gradlew b/gradlew index 9d82f78..a69d9cb 100755 --- a/gradlew +++ b/gradlew @@ -1,74 +1,129 @@ -#!/usr/bin/env bash +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS="" +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` +APP_BASE_NAME=${0##*/} + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum -warn ( ) { +warn () { echo "$*" -} +} >&2 -die ( ) { +die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac -# Attempt to set APP_HOME -# Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi -done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null - CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + # Determine the Java command to use to start the JVM. if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -77,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" + JAVACMD=java which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the @@ -85,76 +140,101 @@ location of your Java installation." fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac fi -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) -# For Cygwin, switch paths to Windows format before running java -if $cygwin ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) fi - i=$((i+1)) + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg done - case $i in - (0) set -- ;; - (1) set -- "$args0" ;; - (2) set -- "$args0" "$args1" ;; - (3) set -- "$args0" "$args1" "$args2" ;; - (4) set -- "$args0" "$args1" "$args2" "$args3" ;; - (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac fi -# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules -function splitJvmOpts() { - JVM_OPTS=("$@") -} -eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS -JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME" +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi -exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@" +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat index 8a0b282..53a6b23 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -1,90 +1,91 @@ -@if "%DEBUG%" == "" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS= - -set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto init - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:init -@rem Get command-line arguments, handling Windowz variants - -if not "%OS%" == "Windows_NT" goto win9xME_args -if "%@eval[2+2]" == "4" goto 4NT_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* -goto execute - -:4NT_args -@rem Get arguments from the 4NT Shell from JP Software -set CMD_LINE_ARGS=%$ - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% - -:end -@rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/grails-app/conf/logback.groovy b/grails-app/conf/logback.groovy deleted file mode 100644 index 2f7c41c..0000000 --- a/grails-app/conf/logback.groovy +++ /dev/null @@ -1,23 +0,0 @@ -import grails.util.BuildSettings -import grails.util.Environment - -// See http://logback.qos.ch/manual/groovy.html for details on configuration -appender('STDOUT', ConsoleAppender) { - encoder(PatternLayoutEncoder) { - pattern = "%level %logger - %msg%n" - } -} - -root(ERROR, ['STDOUT']) - -def targetDir = BuildSettings.TARGET_DIR -if (Environment.isDevelopmentMode() && targetDir) { - appender("FULL_STACKTRACE", FileAppender) { - file = "${targetDir}/stacktrace.log" - append = true - encoder(PatternLayoutEncoder) { - pattern = "%level %logger - %msg%n" - } - } - logger("StackTrace", ERROR, ['FULL_STACKTRACE'], false) -} diff --git a/grails-app/conf/logback.xml b/grails-app/conf/logback.xml new file mode 100644 index 0000000..d4f4b5e --- /dev/null +++ b/grails-app/conf/logback.xml @@ -0,0 +1,12 @@ + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + diff --git a/grails-app/conf/spring/resources.groovy b/grails-app/conf/spring/resources.groovy deleted file mode 100644 index 8f25469..0000000 --- a/grails-app/conf/spring/resources.groovy +++ /dev/null @@ -1,8 +0,0 @@ -// Place your Spring DSL code here -import languagetool_nlp.ApiDocumentationConfiguration - - // Place your Spring DSL code here -beans = { - apiDocumentationConfiguration(ApiDocumentationConfiguration) { - } -} diff --git a/grails-app/controllers/languagetool_nlp/UberController.groovy b/grails-app/controllers/languagetool_nlp/UberController.groovy new file mode 100644 index 0000000..5416ba1 --- /dev/null +++ b/grails-app/controllers/languagetool_nlp/UberController.groovy @@ -0,0 +1,50 @@ +package languagetool_nlp + +import grails.converters.* +import grails.rest.* + +import org.springframework.stereotype.Controller +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestMethod; + +import io.swagger.annotations.* + +@Api(value = "Ubertext services", + description = "Ubertext services for Ukrainian language", + produces = 'application/json', + consumes = 'application/json', + tags=["uber"] +) +@Controller(value="/uber") +class UberController extends ControllerBase { + def uberService + + @ApiOperation( + value = "Ubers the text into sentences, lemmas and tokens", + httpMethod = "POST" + ) + @ApiResponses([ + @ApiResponse(code = 400, message = "Invalid body provided"), + @ApiResponse(code = 400, message = "Text limit exceeded") + ]) + @ApiImplicitParams([ + @ApiImplicitParam(name = 'body', paramType = 'body', required = true, dataType='InputBody', + value='Body text; e.g
["Сьогодні у продажі. 12-те зібрання творів 1969 р. І. П. Котляревського."]') + ]) + + @RequestMapping(value="/uber") + def save() { + + try { + def result = uberService.uber(request.JSON, params) + + render result as JSON + } + catch(Exception e) { + e.printStackTrace() + render(status: 500, text: "Internal error: " + e.getMessage()) + return + } + + } +} diff --git a/grails-app/controllers/languagetool_nlp/UrlMappings.groovy b/grails-app/controllers/languagetool_nlp/UrlMappings.groovy index a5c02f1..454d1fe 100644 --- a/grails-app/controllers/languagetool_nlp/UrlMappings.groovy +++ b/grails-app/controllers/languagetool_nlp/UrlMappings.groovy @@ -11,9 +11,10 @@ class UrlMappings { "/"(controller: 'application', action:'index') - "/tokenize"(controller: 'tokenize', action:'save') + "/tokenize"(controller: 'tokenize', action:'save') "/lemmatize"(controller: 'lemmatize', action:'save') "/tag"(controller: 'tag', action:'save') + "/uber"(controller: 'uber', action:'save') "500"(view: '/error') "404"(view: '/notFound') diff --git a/grails-app/init/languagetool_nlp/ApiDocumentationConfiguration.groovy b/grails-app/init/languagetool_nlp/ApiDocumentationConfiguration.groovy deleted file mode 100644 index eafdda2..0000000 --- a/grails-app/init/languagetool_nlp/ApiDocumentationConfiguration.groovy +++ /dev/null @@ -1,52 +0,0 @@ -package languagetool_nlp; - -import org.springframework.context.annotation.Bean -import org.springframework.context.annotation.Configuration - -import springfox.documentation.builders.ApiInfoBuilder -import springfox.documentation.builders.PathSelectors -import springfox.documentation.builders.RequestHandlerSelectors -import springfox.documentation.service.ApiInfo -import springfox.documentation.service.Contact -import springfox.documentation.spi.DocumentationType -import springfox.documentation.spring.web.plugins.Docket -import springfox.documentation.swagger.web.UiConfiguration -import springfox.documentation.swagger2.annotations.EnableSwagger2 -//import springfox.documentation.builders.PathSelectors; -//import springfox.documentation.builders.RequestHandlerSelectors; -//import springfox.documentation.service.ApiInfo; -//import springfox.documentation.service.Contact -//import springfox.documentation.spi.DocumentationType; -//import springfox.documentation.spring.web.plugins.Docket; -//import springfox.documentation.swagger.web.UiConfiguration; -//import springfox.documentation.swagger2.annotations.EnableSwagger2; -//import static springfox.documentation.builders.PathSelectors.regex; - - -@Configuration -@EnableSwagger2 -public class ApiDocumentationConfiguration { - @Bean - public Docket documentation() { - return new Docket(DocumentationType.SWAGGER_2) - .select() - .apis(RequestHandlerSelectors.any()) - .paths(PathSelectors.regex("/(?!(error|shutdown)).*")) - .build().pathMapping("/") - .apiInfo(metadata()); - } - - @Bean - public UiConfiguration uiConfig() { - return UiConfiguration.DEFAULT; - } - - private ApiInfo metadata() { - return new ApiInfoBuilder() - .title("Simple NLP services for Ukrainian") - .description("This is a project to provide NLP services for Ukrainian language based on LanguageTool") - .version("1.0") - .contact(new Contact("Andriy Rysin", "https://github.com/brown-uk/nlp_uk", "arysin@gmail.com")) - .build(); - } -} diff --git a/grails-app/services/languagetool_nlp/UberService.groovy b/grails-app/services/languagetool_nlp/UberService.groovy new file mode 100644 index 0000000..80f4336 --- /dev/null +++ b/grails-app/services/languagetool_nlp/UberService.groovy @@ -0,0 +1,80 @@ +package languagetool_nlp + +import org.languagetool.* +import org.languagetool.tokenizers.* +import org.languagetool.language.* +import org.languagetool.uk.* +import org.languagetool.tokenizers.uk.* +import java.util.regex.Pattern + + +import static groovyx.gpars.GParsPool.* + +class UberService { + static transactional = false + + def WORD_PATTERN = ~/./ + + SRXSentenceTokenizer sentTokenizer = new SRXSentenceTokenizer(new Ukrainian()) + UkrainianWordTokenizer wordTokenizer = new UkrainianWordTokenizer() + JLanguageTool langTool = new MultiThreadedJLanguageTool(new Ukrainian()); + + List>> uber(def body, def params) { + def batch = body + + withPool { + batch.collectParallel { text -> + List sentences = sentTokenizer.tokenize(text); + List analyzedSentences = langTool.analyzeSentences(sentences); + + [sentences, analyzedSentences].transpose().collect { + def sent = it[0] + def analyzedSentence = it[1] + + def words = wordTokenizer.tokenize(sent).findAll { WORD_PATTERN.matcher(it) } + + words = adjustTokens(words, true) + + def lemmas = analyzedSentence.getTokens().collect { AnalyzedTokenReadings readings -> + if( readings.isWhitespace() || readings.getAnalyzedToken(0).lemma == null ) { + readings.token + } + else { + readings[0].getLemma() + } + } + + lemmas = lemmas.findAll { WORD_PATTERN.matcher(it) } + + [sent, words, lemmas] + } + } + } + } + + public static Pattern WITH_PARTS = ~/(?iu)([а-яіїєґ][а-яіїєґ'\u2019\u02bc-]+)[-\u2013](бо|но|то|от|таки)$/ + + static List notParts = ['себ-то', 'цеб-то', 'як-от', 'ф-но', 'все-таки', 'усе-таки', 'то-то', + 'тим-то', 'аби-то', 'єй-бо', 'їй-бо', 'от-от', 'от-от-от', 'ото-то'] + + List adjustTokens(List words, boolean withHyphen) { + List newWords = [] + String hyph = withHyphen ? "-" : "" + + words.forEach { String word -> + String lWord = word.toLowerCase().replace('\u2013', '-') + if( lWord.contains('-') && ! (lWord in notParts) ) { + def matcher = WITH_PARTS.matcher(word) + + if( matcher ) { + newWords << matcher.group(1) << hyph + matcher.group(2) + return + } + } + + newWords << word + } + + return newWords + } +}