package lib; import java.net.URLEncoder; import java.io.UnsupportedEncodingException; import java.text.CharacterIterator; import java.text.StringCharacterIterator; import java.util.regex.Pattern; import java.util.regex.Matcher; /** * Convenience methods for escaping special characters related to HTML, XML, * and regular expressions. * *

To keep you safe by default, WEB4J goes to some effort to escape * characters in your data when appropriate, such that you usually * don't need to think too much about escaping special characters. Thus, you * shouldn't need to directly use the services of this class very often. * *

For Model Objects containing free form user input, * it is highly recommended that you use {@link SafeText}, not String. * Free form user input is open to malicious use, such as * Cross Site Scripting * attacks. * Using SafeText will protect you from such attacks, by always escaping * special characters automatically in its toString() method. * *

The following WEB4J classes will automatically escape special characters * for you, when needed : *

the {@link SafeText} class, used as a building block class for your * application's Model Objects, for modeling all free form user input *
the {@link Populate} tag used with forms *
the {@link Report} class used for creating quick reports *
the {@link Text}, {@link TextFlow}, and {@link Tooltips} custom tags used * for translation *

*/ public final class EscapeChars { /** * Escape characters for text appearing in HTML markup. * *

This method exists as a defence against Cross Site Scripting (XSS) hacks. * The idea is to neutralize control characters commonly used by scripts, such that * they will not be executed by the browser. This is done by replacing the control * characters with their escaped equivalents. * See {@link hirondelle.web4j.security.SafeText} as well. * *

The following characters are replaced with corresponding * HTML character entities : * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Character Replacement
< <
> >
& &
" "
\t
! !
# #
$ $
% %
' '
( (
) )
* *
+ +
, ,
- -
. .
/ /
: :
; ;
= =
? ?
@ @
[ [
\ \
] ]
^ ^
_ _
` `
{ {
| |
} }
~ ~
* *

Character	Replacement
<	<
>	>
&	&
"	"
\t
!	!
#	#
$	$
%	%
'	'
(	(
)	)
*	*
+	+
,	,
-	-
.	.
/	/
:	:
;	;
=	=
?	?
@	@
[	[
\	\
]	]
^	^
_	_
`	`
{	{
\|	\|
}	}
~	~

Note that JSTL's {@code } escapes only the first * five of the above characters. */ public static String forHTML(String aText){ final StringBuilder result = new StringBuilder(); final StringCharacterIterator iterator = new StringCharacterIterator(aText); char character = iterator.current(); while (character != CharacterIterator.DONE ){ if (character == '<') { result.append("<"); } else if (character == '>') { result.append(">"); } else if (character == '&') { result.append("&"); } else if (character == '\"') { result.append("""); } else if (character == '\t') { addCharEntity(9, result); } else if (character == '!') { addCharEntity(33, result); } else if (character == '#') { addCharEntity(35, result); } else if (character == '$') { addCharEntity(36, result); } else if (character == '%') { addCharEntity(37, result); } else if (character == '\'') { addCharEntity(39, result); } else if (character == '(') { addCharEntity(40, result); } else if (character == ')') { addCharEntity(41, result); } else if (character == '*') { addCharEntity(42, result); } else if (character == '+') { addCharEntity(43, result); } else if (character == ',') { addCharEntity(44, result); } else if (character == '-') { addCharEntity(45, result); } else if (character == '.') { addCharEntity(46, result); } else if (character == '/') { addCharEntity(47, result); } else if (character == ':') { addCharEntity(58, result); } else if (character == ';') { addCharEntity(59, result); } else if (character == '=') { addCharEntity(61, result); } else if (character == '?') { addCharEntity(63, result); } else if (character == '@') { addCharEntity(64, result); } else if (character == '[') { addCharEntity(91, result); } else if (character == '\\') { addCharEntity(92, result); } else if (character == ']') { addCharEntity(93, result); } else if (character == '^') { addCharEntity(94, result); } else if (character == '_') { addCharEntity(95, result); } else if (character == '`') { addCharEntity(96, result); } else if (character == '{') { addCharEntity(123, result); } else if (character == '|') { addCharEntity(124, result); } else if (character == '}') { addCharEntity(125, result); } else if (character == '~') { addCharEntity(126, result); } else { //the char is not a special one //add it to the result as is result.append(character); } character = iterator.next(); } return result.toString(); } /** * Escape all ampersand characters in a URL. * *

Replaces all '&' characters with '&'. * *

An ampersand character may appear in the query string of a URL. * The ampersand character is indeed valid in a URL. * However, URLs usually appear as an HREF attribute, and * such attributes have the additional constraint that ampersands * must be escaped. * *

The JSTL tag does indeed perform proper URL encoding of * query parameters. But it does not, in general, produce text which * is valid as an HREF attribute, simply because it does * not escape the ampersand character. This is a nuisance when * multiple query parameters appear in the URL, since it requires a little * extra work. */ public static String forHrefAmpersand(String aURL){ return aURL.replace("&", "&"); } /** * Synonym for URLEncoder.encode(String, "UTF-8"). * *

Used to ensure that HTTP query strings are in proper form, by escaping * special characters such as spaces. * *

It is important to note that if a query string appears in an HREF * attribute, then there are two issues - ensuring the query string is valid HTTP * (it is URL-encoded), and ensuring it is valid HTML (ensuring the * ampersand is escaped). */ public static String forURL(String aURLFragment){ String result = null; try { result = URLEncoder.encode(aURLFragment, "UTF-8"); } catch (UnsupportedEncodingException ex){ throw new RuntimeException("UTF-8 not supported", ex); } return result; } /** * Escape characters for text appearing as XML data, between tags. * *

The following characters are replaced with corresponding character entities : * * * * * * * *
Character Encoding
< <
> >
& &
" "
' '
* *

Character	Encoding
<	<
>	>
&	&
"	"
'	'

Note that JSTL's {@code } escapes the exact same set of * characters as this method. That is, {@code } * is good for escaping to produce valid XML, but not for producing safe * HTML. */ public static String forXML(String aText){ final StringBuilder result = new StringBuilder(); final StringCharacterIterator iterator = new StringCharacterIterator(aText); char character = iterator.current(); while (character != CharacterIterator.DONE ){ if (character == '<') { result.append("<"); } else if (character == '>') { result.append(">"); } else if (character == '\"') { result.append("""); } else if (character == '\'') { result.append("'"); } else if (character == '&') { result.append("&"); } else { //the char is not a special one //add it to the result as is result.append(character); } character = iterator.next(); } return result.toString(); } /** * Return aText with all '<' and '>' characters * replaced by their escaped equivalents. */ public static String toDisableTags(String aText){ final StringBuilder result = new StringBuilder(); final StringCharacterIterator iterator = new StringCharacterIterator(aText); char character = iterator.current(); while (character != CharacterIterator.DONE ){ if (character == '<') { result.append("<"); } else if (character == '>') { result.append(">"); } else { //the char is not a special one //add it to the result as is result.append(character); } character = iterator.next(); } return result.toString(); } /** * Replace characters having special meaning in regular expressions * with their escaped equivalents, preceded by a '\' character. * *

The escaped characters include : *

. *
\ *
?, * , and + *
& *
: *
{ and } *
[ and ] *
( and ) *
^ and $ *

*/ public static String forRegex(String aRegexFragment){ final StringBuilder result = new StringBuilder(); final StringCharacterIterator iterator = new StringCharacterIterator(aRegexFragment) ; char character = iterator.current(); while (character != CharacterIterator.DONE ){ /* * All literals need to have backslashes doubled. */ if (character == '.') { result.append("\\."); } else if (character == '\\') { result.append("\\\\"); } else if (character == '?') { result.append("\\?"); } else if (character == '*') { result.append("\\*"); } else if (character == '+') { result.append("\\+"); } else if (character == '&') { result.append("\\&"); } else if (character == ':') { result.append("\\:"); } else if (character == '{') { result.append("\\{"); } else if (character == '}') { result.append("\\}"); } else if (character == '[') { result.append("\\["); } else if (character == ']') { result.append("\\]"); } else if (character == '(') { result.append("\$"); } else if (character == ')') { result.append("\$"); } else if (character == '^') { result.append("\\^"); } else if (character == '$') { result.append("\\$"); } else { //the char is not a special one //add it to the result as is result.append(character); } character = iterator.next(); } return result.toString(); } /** * Escape '$' and '\' characters in replacement strings. * *

Synonym for Matcher.quoteReplacement(String). * *

The following methods use replacement strings which treat * '$' and '\' as special characters: *

String.replaceAll(String, String) *
String.replaceFirst(String, String) *
Matcher.appendReplacement(StringBuffer, String) *

* *

If replacement text can contain arbitrary characters, then you * will usually need to escape that text, to ensure special characters * are interpreted literally. */ public static String forReplacementString(String aInput){ return Matcher.quoteReplacement(aInput); } /** * Disable all ", Pattern.CASE_INSENSITIVE ); private static void addCharEntity(Integer aIdx, StringBuilder aBuilder){ String padding = ""; if( aIdx <= 9 ){ padding = "00"; } else if( aIdx <= 99 ){ padding = "0"; } else { //no prefix } String number = padding + aIdx.toString(); aBuilder.append("&#" + number + ";"); } }

Character	Replacement
<	<
>	>
&	&
"	"
\t
!	!
#	#
$	$
%	%
'	'
(	(
)	)
*	*
+	+
,	,
-	-
.	.
/	/
:	:
;	;
=	=
?	?
@	@
[	[
\	\
]	]
^	^
_	_
`	`
{	{
\|	\|
}	}
~	~

Character	Replacement
<	<
>	>
&	&
"	"
\t
!	!
#	#
$	$
%	%
'	'
(	(
)	)
*	*
+	+
,	,
-	-
.	.
/	/
:	:
;	;
=	=
?	?
@	@
[	[
\	\
]	]
^	^
_	_
`	`
{	{
\|	\|
}	}
~	~

Character	Replacement
<	<
>	>
&	&
"	"
\t
!	!
#	#
$	$
%	%
'	'
(	(
)	)
*	*
+	+
,	,
-	-
.	.
/	/
:	:
;	;
=	=
?	?
@	@
[	[
\	\
]	]
^	^
_	_
`	`
{	{
\|	\|
}	}
~	~