6f0a7377a1c0faa24f9b60e24ab2af4a

While tuning the application, I found this routine that strips Xml string of CDATA tags and replaces certain characters with character references so these could be displayed in a Html page.
The routine is less than perfect; it will leave trailing space and will break with StringOutOfBounds exception if there is something wrong with Xml.
I have created a few unit tests when I started working on the routing, but the present functionality can be improved, so these serve more of a reference.
The routine needs refactoring for sanity reasons. But, the real reason I need to fix this routine is to improve a performance. It has become a serious performance bottleneck in the application.
Edit:
For some reason, a huge ammount of whitespace is inserted when I submitthe code.

package engine;

import junit.framework.Assert;
import junit.framework.TestCase;

public class StringFunctionsTest extends TestCase {
		
	public void testEscapeXMLSimple(){
		final String simple = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]></SvcRsData></xml> ";		
		final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#38; Smith&#60;/sender&#62;</SvcRsData></xml> ";
		String result = StringFunctions.escapeXML(simple);
		Assert.assertTrue(result.equals(expected));
	}
	
	public void testEscapeXMLCDATAInsideCDATA(){
		final String stringWithCDATAInsideCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John <![CDATA[Inner & CD ]]>& Smith</sender>]]></SvcRsData></xml> ";		
		final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#60;![CDATA[Inner &#38; CD & Smith</sender>]]></SvcRsData></xml> ";
		String result = StringFunctions.escapeXML(stringWithCDATAInsideCDATA);		
		Assert.assertTrue(result.equals(expected));
	}
	
	public void testEscapeXMLCDATAWithoutClosingTag(){		
		final String stringWithCDATAWithoutClosingTag = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender></SvcRsData></xml> ";
		try{
			String result = StringFunctions.escapeXML(stringWithCDATAWithoutClosingTag);
		}catch(StringIndexOutOfBoundsException exception){
			Assert.assertNotNull(exception);
		}	
	}
	
	public void testEscapeXMLCDATAWithTwoCDATAClosingTags(){		
		final String stringWithCDATAWithTwoClosingTags = "<xml><SvcRsData>a<![CDATA[<sender>John Inner & CD ]]>& Smith</sender>]]>bcd & efg</SvcRsData></xml> ";		
		final String expectedAfterSecondClosingTagNotEscaped = "<xml><SvcRsData>a&#60;sender&#62;John Inner &#38; CD & Smith</sender>]]>bcd & efg</SvcRsData></xml> ";
		String result = StringFunctions.escapeXML(stringWithCDATAWithTwoClosingTags);
		Assert.assertTrue(result.equals(expectedAfterSecondClosingTagNotEscaped));
	}
	
	public void testEscapeXMLSimpleTwoCDATA(){
		final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><![CDATA[<recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";		
		final String expected = "<xml><SvcRsData>a&#60;sender&#62;John &#38; Smith&#60;/sender&#62;abc<sometag>xyz</sometag><sometag2>&#60;recipient&#62;Gorge &#38; Doe&#60;/recipient&#62;</sometag2></SvcRsData></xml> ";
		String result = StringFunctions.escapeXML(stringWithTwoCDATA);
		Assert.assertTrue(result.equals(expected));
	}
	
	public void testEscapeXMLOverlappingCDATA(){
		final String stringWithTwoCDATA = "<xml><SvcRsData>a<![CDATA[<sender>John & <![CDATA[Smith</sender>]]>abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";		
		final String expectedMess = "<xml><SvcRsData>a&#60;sender&#62;John &#38; &#60;![CDATA[Smith&#60;/sender&#62;abc<sometag>xyz</sometag><sometag2><recipient>Gorge & Doe</recipient>]]></sometag2></SvcRsData></xml> ";
		String result = StringFunctions.escapeXML(stringWithTwoCDATA);
		Assert.assertTrue(result.equals(expectedMess));
	}
	
}

package engine;

public class StringFunctions {
	
	public static String escapeXML(String s) {
		StringBuffer result = new StringBuffer();
		int stringSize = 0;
		int posIniData = 0, posFinData = 0, posIniCData = 0, posFinCData = 0;
		String stringPreData = "", stringRsData = "", stringPosData = "", stringCData = "", stringPreCData = "", stringTempRsData = "";
		String stringNewRsData = "", stringPosCData = "", stringNewCData = "";
		short caracter;
		
		stringSize = s.length();
		posIniData = s.indexOf("<SvcRsData>");
		if (posIniData > 0) {
			posIniData = posIniData + 11;
			posFinData = s.indexOf("</SvcRsData>");
			stringPreData = s.substring(0, posIniData);
			stringRsData = s.substring(posIniData, posFinData);
			stringPosData = s.substring(posFinData, stringSize);
			stringTempRsData = stringRsData;
			posIniCData = stringRsData.indexOf("<![CDATA[");
			if (posIniCData > 0) {
				while (posIniCData > 0) {
					posIniCData = posIniCData + 9;
					posFinCData = stringTempRsData.indexOf("]]>");
					stringPreCData = stringTempRsData.substring(0,
							posIniCData - 9);
					stringCData = stringTempRsData.substring(posIniCData,
							posFinCData);
					stringPosCData = stringTempRsData.substring(
							posFinCData + 3, stringTempRsData.length());

					stringNewCData = replaceCharacter(stringCData);
					stringTempRsData = stringTempRsData.substring(
							posFinCData + 3, stringTempRsData.length());
					stringNewRsData = stringNewRsData + stringPreCData
							+ stringNewCData;
					posIniCData = stringTempRsData.indexOf("<![CDATA[");
				}
			} else {
				stringNewRsData = stringRsData;
			}
			stringNewRsData = stringNewRsData + stringPosCData;
			s = stringPreData + stringNewRsData + stringPosData;
			stringSize = s.length();
		}

		for (int i = 0; i < stringSize; i++) {
			caracter = (short) s.charAt(i);
			if (caracter > 128) {
				result.append("&#");
				result.append(caracter);
				result.append(';');
			} else {
				result.append((char) caracter);
			}
		}
		return result.toString();
		
	}

	private static String replaceCharacter(String s) {
		StringBuffer result = new StringBuffer();
		int stringSize = s.length();
		short caracter;

		for (int i = 0; i < stringSize; i++) {

			caracter = (short) s.charAt(i);
			if (caracter > 128 || caracter == 34 || caracter == 38
					|| caracter == 60 || caracter == 62) {
				result.append("&#");
				result.append(caracter);
				result.append(';');
			} else {
				result.append((char) caracter);
			}
		}
		return result.toString();

	}

}

Refactorings

No refactoring yet !

60929e8e0445f0070e501f5ded8ad348

MetroidFan2002

September 19, 2009, September 19, 2009 06:43, permalink

No rating. Login to rate!

The escape function is the common piece - but its odd that so many specific ones are there.

Here is code that achieves the exact same functionality. Note that I like LISP, so I tend to avoid variables - you may replace the substring calls in escapeCDataCharacters with variables having these substrings if you like that better.

package engine;

public class StringFunctions {
	
	public static final String CDATA_ENDING_TAG = "]]>";
	public static final String CDATA_BEGINNING_TAG = "<![CDATA[";
	public static final String ENDING_TAG = "</SvcRsData>";
	public static final String BEGINNING_TAG = "<SvcRsData>";
	
	public static String escapeXML(String original) {
		int tagIndex = original.indexOf(BEGINNING_TAG);
		if (tagIndex > 0) {
			int startingIndex = tagIndex + 11;
			int endingIndex = original.indexOf(ENDING_TAG);
			String charactersBeforeAndIncludingBeginningTag = original
			        .substring(0, startingIndex);
			String charactersAfterAndIncludingEndingTag = original
			        .substring(endingIndex);
			
			return replaceCharacter(new StringBuffer(
			        charactersBeforeAndIncludingBeginningTag).append(
			        escapeCDataCharacters(original.substring(startingIndex,
			                endingIndex))).append(
			        charactersAfterAndIncludingEndingTag).toString(), false);
		}
		
		return replaceCharacter(original, false);
	}
	
	private static String escapeCDataCharacters(String charactersWithinTags) {
		StringBuffer escapedCDataCharacters = new StringBuffer();
		int cDataTagStartingIndex = charactersWithinTags
		        .indexOf(CDATA_BEGINNING_TAG);
		String charactersRemainingAfterLastCDataTag = charactersWithinTags;
		while (cDataTagStartingIndex > 0) {
			int cDataStartingIndex = cDataTagStartingIndex + 9;
			int cDataTagEndingIndex = charactersRemainingAfterLastCDataTag
			        .indexOf(CDATA_ENDING_TAG);
			
			escapedCDataCharacters.append(
					charactersRemainingAfterLastCDataTag
			        .substring(0, cDataTagStartingIndex)).append(
			        replaceCharacter(charactersRemainingAfterLastCDataTag
					        .substring(cDataStartingIndex, cDataTagEndingIndex), true));
			
			charactersRemainingAfterLastCDataTag = charactersRemainingAfterLastCDataTag
			        .substring(cDataTagEndingIndex + 3);
			cDataTagStartingIndex = charactersRemainingAfterLastCDataTag
			        .indexOf(CDATA_BEGINNING_TAG);
		}
		
		return escapedCDataCharacters.append(
		        charactersRemainingAfterLastCDataTag).toString();
	}
	
	private static String replaceCharacter(String s, boolean extendedCharacterEscape) {
		StringBuffer result = new StringBuffer();
		short character;
		for (int i = 0; i < s.length(); i++) {
			character = (short) s.charAt(i);
			if (needsEscape(character, extendedCharacterEscape)) {
				result.append("&#");
				result.append(character);
				result.append(';');
			}
			else {
				result.append((char) character);
			}
		}
		return result.toString();
	}
	
	private static boolean needsEscape(short character, boolean extendedCharacterEscape) {
		return character > 128 ? true
		        : extendedCharacterEscape ? (character == 34 || character == 38
		                || character == 60 || character == 62) : false;
	}
	
}
60929e8e0445f0070e501f5ded8ad348

MetroidFan2002

September 19, 2009, September 19, 2009 06:46, permalink

No rating. Login to rate!

Unintentional double post - system was having issues.

60929e8e0445f0070e501f5ded8ad348

MetroidFan2002

September 19, 2009, September 19, 2009 06:49, permalink

No rating. Login to rate!

Triple post...ugh. Can't seem to delete.

D41d8cd98f00b204e9800998ecf8427e

Stefan Vartolomeev

September 1, 2010, September 01, 2010 14:14, permalink

No rating. Login to rate!
package refactor.engine;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class StringFunctions {
	
	private static final String CDATA_START_TAG = "<![CDATA[";
	private static final String CDATA_END_TAG = "]]>";

	private static final String DATA_START_TAG = "<SvcRsData>";
	private static final String DATA_END_TAG = "</SvcRsData>";

	private static final String CDATA_START_TAG_RE_ESCAPED = "<\\!\\[CDATA\\[";
	private static final String CDATA_END_TAG_RE_ESCAPED = "\\]\\]>";
	private static Pattern CDATA_TAG_PATTERN = Pattern.compile("(.*?)(" + CDATA_START_TAG_RE_ESCAPED + ".*?" + CDATA_END_TAG_RE_ESCAPED + ")(.*)");

	
	public static String escapeXML(String xml) {
		Pattern tokenizeDataTagPattern = Pattern.compile("(.*" + DATA_START_TAG + ")(.*)(" + DATA_END_TAG + ".*)");
		
		Matcher tokenizedDataTag = tokenizeDataTagPattern.matcher(xml);
		if (tokenizedDataTag.matches()) {
			StringBuilder escapedXML = new StringBuilder();
			
			escapedXML.append(tokenizedDataTag.group(1));
			escapedXML.append(escapeCDataTags(tokenizedDataTag.group(2)));
			escapedXML.append(tokenizedDataTag.group(3));
			
			xml = escapedXML.toString();
		}

		return escapeString(xml);
	}
	
	private static String escapeCDataTags(String content) {
		Matcher cdataTagMatcher = CDATA_TAG_PATTERN.matcher(content);
		
		if (!cdataTagMatcher.matches()) return content;
		
		StringBuilder sb = new StringBuilder();
		sb.append(cdataTagMatcher.group(1));
		sb.append(escapeCDataTag(cdataTagMatcher.group(2)));
		sb.append(escapeCDataTags(cdataTagMatcher.group(3)));
		return sb.toString();
	}

	private static String escapeCDataTag(String cdataTagText) {
		return escapeString(stripCDataTag(cdataTagText), true);
	}

	private static String stripCDataTag(String cdataTagText) {
		int startTagIndex = cdataTagText.indexOf(CDATA_START_TAG);
		int endTagIndex = cdataTagText.indexOf(CDATA_END_TAG);
		
		return cdataTagText.substring(startTagIndex + CDATA_START_TAG.length(), endTagIndex);
	}

	private static String escapeString(String string) {
		return escapeString(string, false);
	}
	
	private static String escapeString(String string, boolean escapeExtraSymbols) {
		StringBuilder result = new StringBuilder();

		for (int i = 0; i < string.length(); i++) {
			short character = (short) string.charAt(i);
			if (mustEscapeCharacter(character, escapeExtraSymbols)) {
				result.append("&#").append(character).append(';');
			} else {
				result.append((char) character);
			}
		}
		
		return result.toString();
	}
	
	private static boolean mustEscapeCharacter(short character, boolean escapeExtraSymbols) {
		return character > 128 || (escapeExtraSymbols && (character == 34 || character == 38 || character == 60 || character == 62));
	}
}
D41d8cd98f00b204e9800998ecf8427e

f

October 22, 2010, October 22, 2010 06:16, permalink

No rating. Login to rate!

jh

Your refactoring





Format Copy from initial code

or Cancel