From f83c10ff1d5a112c901e6c71e82f0a5af2162cb1 Mon Sep 17 00:00:00 2001 From: kingthorin Date: Tue, 15 Oct 2024 07:54:49 -0400 Subject: [PATCH] encoder: Add transliteration processor - CHANGELOG.md > Add note. - EncodeDecodeProcessors > Register the new processor. - encoder.html > Add help content and behavior examples. (Update JavaDoc links to version 17 references.) - Messages.properties > Add key/value pair for the name. - Transliterate > New encoder. - TransliterateUnitTest > Unit Test for the new encoder. Signed-off-by: kingthorin --- addOns/encoder/CHANGELOG.md | 3 +- .../processors/EncodeDecodeProcessors.java | 2 + .../predefined/utility/Transliterate.java | 39 ++++++++++++++++ .../resources/help/contents/encoder.html | 13 +++++- .../encoder/resources/Messages.properties | 1 + .../utility/TransliterateUnitTest.java | 45 +++++++++++++++++++ 6 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/predefined/utility/Transliterate.java create mode 100644 addOns/encoder/src/test/java/org/zaproxy/addon/encoder/processors/predefined/utility/TransliterateUnitTest.java diff --git a/addOns/encoder/CHANGELOG.md b/addOns/encoder/CHANGELOG.md index 29a35950ef2..81a502eac9a 100644 --- a/addOns/encoder/CHANGELOG.md +++ b/addOns/encoder/CHANGELOG.md @@ -5,7 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased - +### Added +- A predefined processor "Transliterate" which converts text removing accents/diacritics/ligatures (perhaps not fully, due to operation in compatibility mode) leaving only ASCII characters. ## [1.5.0] - 2024-05-07 ### Added diff --git a/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/EncodeDecodeProcessors.java b/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/EncodeDecodeProcessors.java index d26242642b1..ebc5d5a8d1f 100644 --- a/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/EncodeDecodeProcessors.java +++ b/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/EncodeDecodeProcessors.java @@ -55,6 +55,7 @@ import org.zaproxy.addon.encoder.processors.predefined.utility.LowerCase; import org.zaproxy.addon.encoder.processors.predefined.utility.RemoveWhitespace; import org.zaproxy.addon.encoder.processors.predefined.utility.Reverse; +import org.zaproxy.addon.encoder.processors.predefined.utility.Transliterate; import org.zaproxy.addon.encoder.processors.predefined.utility.UpperCase; import org.zaproxy.addon.encoder.processors.script.ScriptBasedEncodeDecodeProcessor; import org.zaproxy.zap.extension.script.ScriptWrapper; @@ -103,6 +104,7 @@ public class EncodeDecodeProcessors { addPredefined("lowercase", LowerCase.getSingleton()); addPredefined("uppercase", UpperCase.getSingleton()); addPredefined("powershellencode", PowerShellEncoder.getSingleton()); + addPredefined("transliterate", Transliterate.getSingleton()); } private Map scriptProcessors = new HashMap<>(); diff --git a/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/predefined/utility/Transliterate.java b/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/predefined/utility/Transliterate.java new file mode 100644 index 00000000000..066b9ab63f0 --- /dev/null +++ b/addOns/encoder/src/main/java/org/zaproxy/addon/encoder/processors/predefined/utility/Transliterate.java @@ -0,0 +1,39 @@ +/* + * Zed Attack Proxy (ZAP) and its related class files. + * + * ZAP is an HTTP/HTTPS proxy for assessing web application security. + * + * Copyright 2024 The ZAP Development Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.zaproxy.addon.encoder.processors.predefined.utility; + +import java.io.IOException; +import java.text.Normalizer; +import org.zaproxy.addon.encoder.processors.predefined.DefaultEncodeDecodeProcessor; + +public class Transliterate extends DefaultEncodeDecodeProcessor { + + private static final Transliterate INSTANCE = new Transliterate(); + + @Override + protected String processInternal(String value) throws IOException { + // Normalize with compatible decomposition, then remove anything non-ASCII + return Normalizer.normalize(value, Normalizer.Form.NFKD).replaceAll("[^\\p{ASCII}]", ""); + } + + public static Transliterate getSingleton() { + return INSTANCE; + } +} diff --git a/addOns/encoder/src/main/javahelp/org/zaproxy/addon/encoder/resources/help/contents/encoder.html b/addOns/encoder/src/main/javahelp/org/zaproxy/addon/encoder/resources/help/contents/encoder.html index 75855dca418..845b93f3eec 100644 --- a/addOns/encoder/src/main/javahelp/org/zaproxy/addon/encoder/resources/help/contents/encoder.html +++ b/addOns/encoder/src/main/javahelp/org/zaproxy/addon/encoder/resources/help/contents/encoder.html @@ -152,7 +152,7 @@

ASCII Hex Decode

Base 64 Decode

Will display the base 64 decoding of the text you enter.
-Leveraging a Mime decoder to handle wrapped lines. +Leveraging a Mime decoder to handle wrapped lines.

Base 64 URL Decode

Will display the base 64 URL decoding of the text you enter. Base64URL is a modification to the primary base 64 standard @@ -198,7 +198,7 @@

To Lower Case

Converts the input to all lower case characters.

Remove Whitespace

-Removes all whitespace characters from the text, based on Character.isWhiteSpace(char). +Removes all whitespace characters from the text, based on Character.isWhiteSpace(char).

Reverse

Reverses the order of the input. @@ -206,6 +206,15 @@

Reverse

To Upper Case

Converts the input to all upper case characters. +

Transliterate

+Converts text removing accents/diacritics/ligatures (perhaps not fully, due to operation in compatibility mode) leaving only ASCII characters. +Ex: Tĥïŝ ĩš â fůňķŷ Šťŕĭńġ: fi. étrange. becomes This is a funky String: fi. etrange..>br> +See also:
+ +

Miscellaneous

PowerShell Encode

diff --git a/addOns/encoder/src/main/resources/org/zaproxy/addon/encoder/resources/Messages.properties b/addOns/encoder/src/main/resources/org/zaproxy/addon/encoder/resources/Messages.properties index fa56f80bc76..896e1068e9e 100644 --- a/addOns/encoder/src/main/resources/org/zaproxy/addon/encoder/resources/Messages.properties +++ b/addOns/encoder/src/main/resources/org/zaproxy/addon/encoder/resources/Messages.properties @@ -59,6 +59,7 @@ encoder.predefined.tab.encode = Encode encoder.predefined.tab.hash = Hash encoder.predefined.tab.illegalUTF8 = Illegal UTF8 encoder.predefined.tab.unicode = Unicode +encoder.predefined.transliterate = Transliterate (Strip accents, etc) encoder.predefined.unicodedecode = Unicode Unescaped Text encoder.predefined.unicodeencode = Unicode Escaped Text encoder.predefined.uppercase = To Upper Case diff --git a/addOns/encoder/src/test/java/org/zaproxy/addon/encoder/processors/predefined/utility/TransliterateUnitTest.java b/addOns/encoder/src/test/java/org/zaproxy/addon/encoder/processors/predefined/utility/TransliterateUnitTest.java new file mode 100644 index 00000000000..69aef23e65a --- /dev/null +++ b/addOns/encoder/src/test/java/org/zaproxy/addon/encoder/processors/predefined/utility/TransliterateUnitTest.java @@ -0,0 +1,45 @@ +/* + * Zed Attack Proxy (ZAP) and its related class files. + * + * ZAP is an HTTP/HTTPS proxy for assessing web application security. + * + * Copyright 2024 The ZAP Development Team + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.zaproxy.addon.encoder.processors.predefined.utility; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; + +import org.junit.jupiter.api.Test; +import org.zaproxy.addon.encoder.processors.EncodeDecodeResult; +import org.zaproxy.addon.encoder.processors.predefined.ProcessorTests; + +class TransliterateUnitTest extends ProcessorTests { + + @Override + protected Transliterate createProcessor() { + return Transliterate.getSingleton(); + } + + @Test + void shouldEncodeWithoutError() throws Exception { + // Given / When + EncodeDecodeResult result = processor.process("Tĥïŝ ĩš â fůňķŷ Šťŕĭńġ: fi. étrange."); + // Then + assertThat(result.hasError(), is(equalTo(false))); + assertThat(result.getResult(), is(equalTo("This is a funky String: fi. etrange."))); + } +}