Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jdk19 regexp fix #10972

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,7 @@
import org.junit.Before;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.*;
import java.nio.charset.StandardCharsets;

import static org.hamcrest.CoreMatchers.is;
Expand All @@ -42,7 +34,7 @@
*
* @author Marcin Miłkowski
*/
public class MainTest extends AbstractSecurityTestCase {
public class MainTest {

private final File enTestFile;
private final File xxRuleFile;
Expand Down Expand Up @@ -113,8 +105,7 @@ public MainTest() throws IOException {
}

@Before
public void setUp() throws Exception {
super.setUp();
public void setUp() {
this.stdout = System.out;
this.stderr = System.err;
this.out = new ByteArrayOutputStream();
Expand All @@ -124,38 +115,33 @@ public void setUp() throws Exception {
}

@After
public void tearDown() throws Exception {
public void tearDown() {
System.setOut(this.stdout);
System.setErr(this.stderr);
super.tearDown();
}

@Test
public void testUsageMessage() throws Exception {
try {
String[] args = {"-h"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "-h"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, exitCode);
Comment on lines +125 to +131
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Capture and assert the error stream when running subprocesses.

In the test method, only the standard output from the subprocess is captured. To ensure comprehensive testing, consider capturing the error stream (process.getErrorStream()) as well. This allows you to assert that no unexpected errors are occurring during execution and helps in diagnosing issues that may not appear in standard output.

Modify the test to read and assert the error output:

String errorOutput = readProcessError(process);
assertTrue("Error output should be empty", errorOutput.isEmpty());

And add a method to read the error stream:

private String readProcessError(Process process) throws IOException {
  try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
    StringBuilder output = new StringBuilder();
    String line;
    while ((line = reader.readLine()) != null) {
      output.append(line).append(System.lineSeparator());
    }
    return output.toString();
  }
}

}

@Test
public void testPrintLanguages() throws Exception {
try {
String[] args = {"--list"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "--list"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, exitCode);
}

@Test
Expand Down Expand Up @@ -670,4 +656,14 @@ private String getExternalFalseFriends() {
return xxFalseFriendFile.getAbsolutePath();
}

private String readProcessOutput(Process process) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
StringBuilder output = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
return output.toString();
}
}
Comment on lines +659 to +668
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

⚠️ Potential issue

Prevent potential deadlocks by consuming both output streams of the subprocess.

The readProcessOutput method reads only from the subprocess's standard output stream. If the subprocess writes enough data to the error stream without it being read, it can block due to the buffer being full, leading to a deadlock. To mitigate this risk, read from both the standard output and error streams.

Refactor the method to consume both streams:

private String readProcessOutput(Process process) throws IOException {
  StringBuilder output = new StringBuilder();
  try (
    BufferedReader stdOutReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
    BufferedReader stdErrReader = new BufferedReader(new InputStreamReader(process.getErrorStream()))
  ) {
    String line;
    while ((line = stdOutReader.readLine()) != null) {
      output.append(line).append(System.lineSeparator());
    }
    while ((line = stdErrReader.readLine()) != null) {
      // Optionally append to output or handle separately
      output.append(line).append(System.lineSeparator());
    }
  }
  return output.toString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public abstract class AbstractUnitConversionRule extends Rule {
protected static final String NUMBER_REGEX = "(-?[0-9]{1,32}[0-9,.]{0,32})";
protected static final String NUMBER_REGEX_WITH_BOUNDARY = "(-?\\b[0-9]{1,32}[0-9,.]{0,32})";

protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$");
protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$", Pattern.UNICODE_CHARACTER_CLASS);

private static final double DELTA = 1e-2;
private static final double ROUNDING_DELTA = 0.05;
Expand Down Expand Up @@ -196,7 +196,7 @@ protected String formatRounded(String s) {
*/
protected void addUnit(String pattern, Unit base, String symbol, double factor, boolean metric) {
Unit unit = base.multiply(factor);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b"), unit);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b", Pattern.UNICODE_CHARACTER_CLASS), unit);
unitSymbols.putIfAbsent(unit, new ArrayList<>());
unitSymbols.get(unit).add(symbol);
if (metric && !metricUnits.contains(unit)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,8 @@ private void createRules(List<PatternToken> elemList,
rule.setDistanceTokens(distanceTokens);
rule.setXmlLineNumber(xmlLineNumber);
} else if (regex.length() > 0) {
int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS;
String regexStr = regex.toString();
if (regexMode == RegexpMode.SMART) {
// Note: it's not that easy to add \b because the regex might look like '(foo)' or '\d' so we cannot just look at the last character
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments,
}
String[] antiPatterns = antiPatternStr.split("\\|");
for (String antiPattern : antiPatterns) {
Pattern p = Pattern.compile(antiPattern);
Pattern p = Pattern.compile(antiPattern, Pattern.UNICODE_CHARACTER_CLASS);
Matcher matcher = p.matcher(sentenceObj.getText());
while (matcher.find()) {
// partial overlap is enough to filter out a match:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,18 @@
*/
package org.languagetool.rules.patterns;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.function.Function;

import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.ResourceBundleTools;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.rules.CorrectExample;
import org.languagetool.rules.ErrorTriggeringExample;
Expand All @@ -35,9 +42,6 @@
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.*;
import java.util.function.Function;

/**
* XML rule handler that loads rules from XML and throws
* exceptions on errors and warnings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
* Tools for loading an SRX tokenizer file.
Expand Down Expand Up @@ -59,7 +60,8 @@ static SrxDocument createSrxDocument(String path) {

static List<String> tokenize(String text, SrxDocument srxDocument, String code) {
List<String> segments = new ArrayList<>();
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text);
Map<String, Object> parserParameters = Map.of(SrxTextIterator.DEFAULT_PATTERN_FLAGS_PARAMETER, Pattern.UNICODE_CHARACTER_CLASS);
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text, parserParameters);
while (textIterator.hasNext()) {
segments.add(textIterator.next());
}
Expand Down
Loading