package engine;
import junit.framework.Assert;
import junit.framework.TestCase;
public class StringFunctionsTest extends TestCase {
public void testEscapeXMLSimple(){
final String simple = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]></SvcRsData></xml> ";
final String expected = "<xml><SvcRsData>a<sender>John & Smith</sender></SvcRsData></xml> ";
String result = StringFunctions.escapeXML(simple);
Assert.assertTrue(result.equals(expected));
}
public void testEscapeXMLCDATAInsideCDATA(){
final String stringWithCDATAInsideCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John <![CDATA[Inner & CD ]]>& Smith</sender>]]></SvcRsData></xml> ";
final String expected = "<xml><SvcRsData>a<sender>John <![CDATA[Inner & CD & Smith</sender>]]></SvcRsData></xml> ";
String result = StringFunctions.escapeXML(stringWithCDATAInsideCDATA);
Assert.assertTrue(result.equals(expected));
}
public void testEscapeXMLCDATAWithoutClosingTag(){
final String stringWithCDATAWithoutClosingTag = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender></SvcRsData></xml> ";
try{
String result = StringFunctions.escapeXML(stringWithCDATAWithoutClosingTag);
}catch(StringIndexOutOfBoundsException exception){
Assert.assertNotNull(exception);
}
}
public void testEscapeXMLCDATAWithTwoCDATAClosingTags(){
final String stringWithCDATAWithTwoClosingTags = "<xml><SvcRsData>a<![CDATA[<sender>John Inner & CD ]]>& Smith</sender>]]>bcd & efg</SvcRsData></xml> ";
final String expectedAfterSecondClosingTagNotEscaped = "<xml><SvcRsData>a<sender>John Inner & CD & Smith</sender>]]>bcd & efg</SvcRsData></xml> ";
String result = StringFunctions.escapeXML(stringWithCDATAWithTwoClosingTags);
Assert.assertTrue(result.equals(expectedAfterSecondClosingTagNotEscaped));
}
public void testEscapeXMLSimpleTwoCDATA(){
final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><![CDATA[<recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";
final String expected = "<xml><SvcRsData>a<sender>John & Smith</sender>abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient></sometag2></SvcRsData></xml> ";
String result = StringFunctions.escapeXML(stringWithTwoCDATA);
Assert.assertTrue(result.equals(expected));
}
public void testEscapeXMLOverlappingCDATA(){
final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & <![CDATA[Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";
final String expectedMess = "<xml><SvcRsData>a<sender>John & <![CDATA[Smith</sender>abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";
String result = StringFunctions.escapeXML(stringWithTwoCDATA);
Assert.assertTrue(result.equals(expectedMess));
}
}
package engine;
public class StringFunctions {
public static String escapeXML(String s) {
StringBuffer result = new StringBuffer();
int stringSize = 0;
int posIniData = 0, posFinData = 0, posIniCData = 0, posFinCData = 0;
String stringPreData = "", stringRsData = "", stringPosData = "", stringCData = "", stringPreCData = "", stringTempRsData = "";
String stringNewRsData = "", stringPosCData = "", stringNewCData = "";
short caracter;
stringSize = s.length();
posIniData = s.indexOf("<SvcRsData>");
if (posIniData > 0) {
posIniData = posIniData + 11;
posFinData = s.indexOf("</SvcRsData>");
stringPreData = s.substring(0, posIniData);
stringRsData = s.substring(posIniData, posFinData);
stringPosData = s.substring(posFinData, stringSize);
stringTempRsData = stringRsData;
posIniCData = stringRsData.indexOf("<![CDATA[");
if (posIniCData > 0) {
while (posIniCData > 0) {
posIniCData = posIniCData + 9;
posFinCData = stringTempRsData.indexOf("]]>");
stringPreCData = stringTempRsData.substring(0,
posIniCData - 9);
stringCData = stringTempRsData.substring(posIniCData,
posFinCData);
stringPosCData = stringTempRsData.substring(
posFinCData + 3, stringTempRsData.length());
stringNewCData = replaceCharacter(stringCData);
stringTempRsData = stringTempRsData.substring(
posFinCData + 3, stringTempRsData.length());
stringNewRsData = stringNewRsData + stringPreCData
+ stringNewCData;
posIniCData = stringTempRsData.indexOf("<![CDATA[");
}
} else {
stringNewRsData = stringRsData;
}
stringNewRsData = stringNewRsData + stringPosCData;
s = stringPreData + stringNewRsData + stringPosData;
stringSize = s.length();
}
for (int i = 0; i < stringSize; i++) {
caracter = (short) s.charAt(i);
if (caracter > 128) {
result.append("&#");
result.append(caracter);
result.append(';');
} else {
result.append((char) caracter);
}
}
return result.toString();
}
private static String replaceCharacter(String s) {
StringBuffer result = new StringBuffer();
int stringSize = s.length();
short caracter;
for (int i = 0; i < stringSize; i++) {
caracter = (short) s.charAt(i);
if (caracter > 128 || caracter == 34 || caracter == 38
|| caracter == 60 || caracter == 62) {
result.append("&#");
result.append(caracter);
result.append(';');
} else {
result.append((char) caracter);
}
}
return result.toString();
}
}
Refactorings
No refactoring yet !
MetroidFan2002
September 19, 2009, September 19, 2009 06:43, permalink
The escape function is the common piece - but its odd that so many specific ones are there.
Here is code that achieves the exact same functionality. Note that I like LISP, so I tend to avoid variables - you may replace the substring calls in escapeCDataCharacters with variables having these substrings if you like that better.
package engine;
public class StringFunctions {
public static final String CDATA_ENDING_TAG = "]]>";
public static final String CDATA_BEGINNING_TAG = "<![CDATA[";
public static final String ENDING_TAG = "</SvcRsData>";
public static final String BEGINNING_TAG = "<SvcRsData>";
public static String escapeXML(String original) {
int tagIndex = original.indexOf(BEGINNING_TAG);
if (tagIndex > 0) {
int startingIndex = tagIndex + 11;
int endingIndex = original.indexOf(ENDING_TAG);
String charactersBeforeAndIncludingBeginningTag = original
.substring(0, startingIndex);
String charactersAfterAndIncludingEndingTag = original
.substring(endingIndex);
return replaceCharacter(new StringBuffer(
charactersBeforeAndIncludingBeginningTag).append(
escapeCDataCharacters(original.substring(startingIndex,
endingIndex))).append(
charactersAfterAndIncludingEndingTag).toString(), false);
}
return replaceCharacter(original, false);
}
private static String escapeCDataCharacters(String charactersWithinTags) {
StringBuffer escapedCDataCharacters = new StringBuffer();
int cDataTagStartingIndex = charactersWithinTags
.indexOf(CDATA_BEGINNING_TAG);
String charactersRemainingAfterLastCDataTag = charactersWithinTags;
while (cDataTagStartingIndex > 0) {
int cDataStartingIndex = cDataTagStartingIndex + 9;
int cDataTagEndingIndex = charactersRemainingAfterLastCDataTag
.indexOf(CDATA_ENDING_TAG);
escapedCDataCharacters.append(
charactersRemainingAfterLastCDataTag
.substring(0, cDataTagStartingIndex)).append(
replaceCharacter(charactersRemainingAfterLastCDataTag
.substring(cDataStartingIndex, cDataTagEndingIndex), true));
charactersRemainingAfterLastCDataTag = charactersRemainingAfterLastCDataTag
.substring(cDataTagEndingIndex + 3);
cDataTagStartingIndex = charactersRemainingAfterLastCDataTag
.indexOf(CDATA_BEGINNING_TAG);
}
return escapedCDataCharacters.append(
charactersRemainingAfterLastCDataTag).toString();
}
private static String replaceCharacter(String s, boolean extendedCharacterEscape) {
StringBuffer result = new StringBuffer();
short character;
for (int i = 0; i < s.length(); i++) {
character = (short) s.charAt(i);
if (needsEscape(character, extendedCharacterEscape)) {
result.append("&#");
result.append(character);
result.append(';');
}
else {
result.append((char) character);
}
}
return result.toString();
}
private static boolean needsEscape(short character, boolean extendedCharacterEscape) {
return character > 128 ? true
: extendedCharacterEscape ? (character == 34 || character == 38
|| character == 60 || character == 62) : false;
}
}
MetroidFan2002
September 19, 2009, September 19, 2009 06:46, permalink
Unintentional double post - system was having issues.
MetroidFan2002
September 19, 2009, September 19, 2009 06:49, permalink
Triple post...ugh. Can't seem to delete.
Stefan Vartolomeev
September 1, 2010, September 01, 2010 14:14, permalink
package refactor.engine;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StringFunctions {
private static final String CDATA_START_TAG = "<![CDATA[";
private static final String CDATA_END_TAG = "]]>";
private static final String DATA_START_TAG = "<SvcRsData>";
private static final String DATA_END_TAG = "</SvcRsData>";
private static final String CDATA_START_TAG_RE_ESCAPED = "<\\!\\[CDATA\\[";
private static final String CDATA_END_TAG_RE_ESCAPED = "\\]\\]>";
private static Pattern CDATA_TAG_PATTERN = Pattern.compile("(.*?)(" + CDATA_START_TAG_RE_ESCAPED + ".*?" + CDATA_END_TAG_RE_ESCAPED + ")(.*)");
public static String escapeXML(String xml) {
Pattern tokenizeDataTagPattern = Pattern.compile("(.*" + DATA_START_TAG + ")(.*)(" + DATA_END_TAG + ".*)");
Matcher tokenizedDataTag = tokenizeDataTagPattern.matcher(xml);
if (tokenizedDataTag.matches()) {
StringBuilder escapedXML = new StringBuilder();
escapedXML.append(tokenizedDataTag.group(1));
escapedXML.append(escapeCDataTags(tokenizedDataTag.group(2)));
escapedXML.append(tokenizedDataTag.group(3));
xml = escapedXML.toString();
}
return escapeString(xml);
}
private static String escapeCDataTags(String content) {
Matcher cdataTagMatcher = CDATA_TAG_PATTERN.matcher(content);
if (!cdataTagMatcher.matches()) return content;
StringBuilder sb = new StringBuilder();
sb.append(cdataTagMatcher.group(1));
sb.append(escapeCDataTag(cdataTagMatcher.group(2)));
sb.append(escapeCDataTags(cdataTagMatcher.group(3)));
return sb.toString();
}
private static String escapeCDataTag(String cdataTagText) {
return escapeString(stripCDataTag(cdataTagText), true);
}
private static String stripCDataTag(String cdataTagText) {
int startTagIndex = cdataTagText.indexOf(CDATA_START_TAG);
int endTagIndex = cdataTagText.indexOf(CDATA_END_TAG);
return cdataTagText.substring(startTagIndex + CDATA_START_TAG.length(), endTagIndex);
}
private static String escapeString(String string) {
return escapeString(string, false);
}
private static String escapeString(String string, boolean escapeExtraSymbols) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < string.length(); i++) {
short character = (short) string.charAt(i);
if (mustEscapeCharacter(character, escapeExtraSymbols)) {
result.append("&#").append(character).append(';');
} else {
result.append((char) character);
}
}
return result.toString();
}
private static boolean mustEscapeCharacter(short character, boolean escapeExtraSymbols) {
return character > 128 || (escapeExtraSymbols && (character == 34 || character == 38 || character == 60 || character == 62));
}
}
While tuning the application, I found this routine that strips Xml string of CDATA tags and replaces certain characters with character references so these could be displayed in a Html page.
The routine is less than perfect; it will leave trailing space and will break with StringOutOfBounds exception if there is something wrong with Xml.
I have created a few unit tests when I started working on the routing, but the present functionality can be improved, so these serve more of a reference.
The routine needs refactoring for sanity reasons. But, the real reason I need to fix this routine is to improve a performance. It has become a serious performance bottleneck in the application.
Edit:
For some reason, a huge ammount of whitespace is inserted when I submitthe code.