From bb289673837b5458ec70b1853ee2691a4e7037e7 Mon Sep 17 00:00:00 2001 From: Oliver Burn Date: Sun, 23 Oct 2011 13:10:41 +1100 Subject: [PATCH] Fixed parsing errors for Unicode escape sequences. Thanks to Dinesh Bolkensteyn for patch #3412812. --- .../tools/checkstyle/grammars/java.g | 75 +++++++++++-------- .../checkstyle/grammars/UnicodeEscape.java | 32 ++++++++ .../grammars/UnicodeEscapeTest.java | 43 +++++++++++ src/xdocs/releasenotes.xml | 4 + 4 files changed, 124 insertions(+), 30 deletions(-) create mode 100644 src/testinputs/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscape.java create mode 100644 src/tests/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscapeTest.java diff --git a/src/checkstyle/com/puppycrawl/tools/checkstyle/grammars/java.g b/src/checkstyle/com/puppycrawl/tools/checkstyle/grammars/java.g index 1821496d0..ad96679f4 100755 --- a/src/checkstyle/com/puppycrawl/tools/checkstyle/grammars/java.g +++ b/src/checkstyle/com/puppycrawl/tools/checkstyle/grammars/java.g @@ -1671,7 +1671,7 @@ CHAR_LITERAL // string literals STRING_LITERAL - : '"' (ESC|~('"'|'\\'))* '"' + : '"' ( ESC | ~'"' )* '"' ; @@ -1686,39 +1686,54 @@ STRING_LITERAL protected ESC : '\\' - ( 'n' - | 'r' - | 't' - | 'b' - | 'f' - | '"' - | '\'' - | '\\' - | ('u')+ HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT - | ('0'..'3') - ( - options { - warnWhenFollowAmbig = false; - } - : ('0'..'7') - ( - options { - warnWhenFollowAmbig = false; - } - : '0'..'7' - )? - )? - | ('4'..'7') - ( - options { - warnWhenFollowAmbig = false; - } - : ('0'..'9') - )? + ( + ('u')+ + (options { generateAmbigWarnings=false; } + : '0' '0' '5' ('c'|'C') + (options { generateAmbigWarnings=false; } + : '\\' ('u')+ '0' '0' '5' ('c'|'C') + | STD_ESC + ) + | HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT + ) + | + STD_ESC ) ; +protected +STD_ESC + : 'n' + | 'r' + | 't' + | 'b' + | 'f' + | '"' + | '\'' + | '\\' + | ('0'..'3') + ( + options { + warnWhenFollowAmbig = false; + } + : ('0'..'7') + ( + options { + warnWhenFollowAmbig = false; + } + : '0'..'7' + )? + )? + | ('4'..'7') + ( + options { + warnWhenFollowAmbig = false; + } + : ('0'..'9') + )? + ; + // hexadecimal digit (again, note it's protected!) protected HEX_DIGIT diff --git a/src/testinputs/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscape.java b/src/testinputs/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscape.java new file mode 100644 index 000000000..945ba160b --- /dev/null +++ b/src/testinputs/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscape.java @@ -0,0 +1,32 @@ +package com.puppycrawl.tools.checkstyle.grammars; + +/** + * Input for unicode escapes. + */ +public class UnicodeEscape +{ + char a = '\u005cr'; + char b = '\u005cn'; + char c = '\u005ct'; + char d = '\uuuu005cn'; + char e = '\u005c\u005c'; + char f = '\u005c''; + char g = '"'; + String h = "\u005c""; + String i = "'"; + char j = '\"'; + String k = "\'"; + char l = '\u005C''; + char m = '\uABCD'; + char n = '\u00AB'; + char o = '\u005B'; + char p = '\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu005cr'; // Tests the lookahead + char q = '\uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu005D'; + + char wtf1 = '\u005c\u005c'; // This is a legal backslash + String wtf2 = "\\u005c"; // = "\u005c", with a single backslash, and != a backslash! + // There is an ambiguity in the grammar, the interpretation is done as "\\" + "u005c" + //char wtf3 = '\\u005c'; // This is therefore, illegal + + //char z = '\u005cu005c'; /* This is illegal */ +} diff --git a/src/tests/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscapeTest.java b/src/tests/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscapeTest.java new file mode 100644 index 000000000..03bc39a62 --- /dev/null +++ b/src/tests/com/puppycrawl/tools/checkstyle/grammars/UnicodeEscapeTest.java @@ -0,0 +1,43 @@ +//////////////////////////////////////////////////////////////////////////////// +// checkstyle: Checks Java source code for adherence to a set of rules. +// Copyright (C) 2001-2011 Oliver Burn +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//////////////////////////////////////////////////////////////////////////////// +package com.puppycrawl.tools.checkstyle.grammars; + +import com.puppycrawl.tools.checkstyle.BaseCheckTestSupport; +import com.puppycrawl.tools.checkstyle.DefaultConfiguration; +import com.puppycrawl.tools.checkstyle.checks.naming.MemberNameCheck; + +import org.junit.Test; + +/** + * Tests that extended unicode escapes can be parsed. + * @author Dinesh Bolkensteyn (SonarSource) + */ +public class UnicodeEscapeTest + extends BaseCheckTestSupport +{ + @Test + public void testCanParse() + throws Exception + { + final DefaultConfiguration checkConfig = + createCheckConfig(MemberNameCheck.class); + final String[] expected = {}; + verify(checkConfig, getPath("grammars/UnicodeEscape.java"), expected); + } +} diff --git a/src/xdocs/releasenotes.xml b/src/xdocs/releasenotes.xml index 1518c70fe..b25832282 100755 --- a/src/xdocs/releasenotes.xml +++ b/src/xdocs/releasenotes.xml @@ -39,6 +39,10 @@ treats setxYz() as a setter for xYz, and setXYz() as a setter for XYz property to comply JavaBeans specification (bug #3370946). +
  • + Fixed parsing errors for Unicode escape sequences. Thanks to + Dinesh Bolkensteyn for patch #3412812. +
  • Notes: