
// -------------------------------------------------------------------------------------------
//
//
//  PROJECT NAME:	epEditor [Cross browser WYSIWYG editor]
//
//	FILE NAME:		cleanHTML.js
//
//	DATE:			10/08/06
//
//  MODIFIED:		01/04/07	: Array of all acceptable elements introduced
//					20/04/07	: Array for image attributes introduced - due to an IE bug which returns a zero
//								  value for a specified image width on a constructed DOM object, the true width and height values
//								  are removed before parsing and stored in an array. These are then reinstated after the DOM has been rebuilt
//					25/10/07	: RegEx added to check for unterminated list items from IE
//					29/10/07	: IE DOM implementation issue with name attribute resolved
//										[http://www.easy-reader.net/archives/2005/09/02/death-to-bad-dom-implementations/]
//								: Cludge to *restore anchor links* in IE due to DOM implementation issue with absolute paths
//										[http://www.quirksmode.org/bugreports/archives/2005/02/getAttributeHREF_is_always_absolute.html]
//										[http://www.glennjones.net/Post/809/getAttributehrefbug.htm]
//					08/11/07	: Check added for href on a elements - if not specified do not perform *restore anchor links* method
//					19/12/07	: IE specific check for presence of lang attribute in nodes with a single attribute
//					09/01/08	: Remove empty tags reg ex amended to avoid removing anchors
//								: Within removeUnwantedNodes, if a nested node has a class specified it will not be removed even if it is of the same type as its parent
//					30/01/08	: Amend to IE specific check for spans containing a single attribute - now recognises *both* 'lang' and 'language'
//					29/04/08	: All IE test for list item cleaning - running this in Firefox breaks good list items
//					20/08/08	: Added code to recognise MsoListBullet class and convert to HTML lists
//
//  AUTHOR:			malcolm elsworth [malcolm@electricputty.co.uk]
//
//
// -------------------------------------------------------------------------------------------


var theTempObj;


var listOpen = false;
var theListNode = 0;


var imageAttributes = new Array;


var LoopChar = new Array;
LoopChar[0] = "ro";
LoopChar[1] = "so";
LoopChar[3] = "to";
LoopChar[4] = "uo";
LoopChar[5] = "vo";
LoopChar[6] = "wo";
LoopChar[7] = "xo";
LoopChar[8] = "yo";
LoopChar[9] = "zo";


var acceptableElements = new Array;
acceptableElements[0] = new Array;
acceptableElements[0][0] = "a";
acceptableElements[0][1] = "charset";
acceptableElements[0][2] = "charset";
acceptableElements[0][3] = "coords";
acceptableElements[0][4] = "href";
acceptableElements[0][5] = "hreflang";
acceptableElements[0][6] = "name";
acceptableElements[0][7] = "rel";
acceptableElements[0][8] = "rev";
acceptableElements[0][9] = "shape";
acceptableElements[0][10] = "target";
acceptableElements[0][11] = "type";

acceptableElements[1] = "abbr";
acceptableElements[2] = "acronym";
acceptableElements[3] = "address";

acceptableElements[4] = new Array;
acceptableElements[4][0] = "area";
acceptableElements[4][1] = "alt";
acceptableElements[4][2] = "coords";
acceptableElements[4][3] = "href";
acceptableElements[4][4] = "nohref";
acceptableElements[4][5] = "shape";

acceptableElements[5] = new Array;
acceptableElements[5][0] = "blockquote";
acceptableElements[5][1] = "cite";

acceptableElements[6] = "br";
acceptableElements[7] = "caption";
acceptableElements[8] = "cite";
acceptableElements[9] = "code";

acceptableElements[10] = new Array;
acceptableElements[10][0] = "col";
acceptableElements[10][1] = "span";

acceptableElements[11] = "colgroup";
acceptableElements[12] = "dd";

acceptableElements[13] = new Array;
acceptableElements[13][0] = "del";
acceptableElements[13][1] = "cite";
acceptableElements[13][2] = "datetime";

acceptableElements[14] = "div";
acceptableElements[15] = "dfn";
acceptableElements[16] = "dl";
acceptableElements[17] = "dt";
acceptableElements[18] = "em";

acceptableElements[19] = "h1";
acceptableElements[20] = "h2";
acceptableElements[21] = "h3";
acceptableElements[22] = "h4";
acceptableElements[23] = "h5";
acceptableElements[24] = "h6";

acceptableElements[25] = "hr";
acceptableElements[26] = "i";

acceptableElements[27] = new Array;
acceptableElements[27][0] = "img";
acceptableElements[27][1] = "alt";
acceptableElements[27][2] = "src";
acceptableElements[27][3] = "width";
acceptableElements[27][4] = "height";
acceptableElements[27][5] = "ismap";
acceptableElements[27][6] = "longdesc";
acceptableElements[27][7] = "usemap";

acceptableElements[28] = new Array;
acceptableElements[28][0] = "ins";
acceptableElements[28][1] = "cite";
acceptableElements[28][2] = "datetime";

acceptableElements[29] = "kbd";
acceptableElements[30] = "li";

acceptableElements[31] = new Array;
acceptableElements[31][0] = "map";
acceptableElements[31][1] = "name";

acceptableElements[32] = new Array;
acceptableElements[32][0] = "object";
acceptableElements[32][1] = "archive";
acceptableElements[32][2] = "classid";
acceptableElements[32][3] = "codebase";
acceptableElements[32][4] = "codetype";
acceptableElements[32][5] = "data";
acceptableElements[32][6] = "declare";
acceptableElements[32][7] = "height";
acceptableElements[32][8] = "name";
acceptableElements[32][9] = "standby";
acceptableElements[32][10] = "type";
acceptableElements[32][11] = "usemap";
acceptableElements[32][12] = "width";

acceptableElements[33] = "ol";
acceptableElements[34] = "p";

acceptableElements[35] = new Array;
acceptableElements[35][0] = "param";
acceptableElements[35][1] = "name";
acceptableElements[35][2] = "type";
acceptableElements[35][3] = "value";
acceptableElements[35][4] = "valuetype";

acceptableElements[36] = "pre";

acceptableElements[37] = new Array;
acceptableElements[37][0] = "q";
acceptableElements[37][1] = "cite";

acceptableElements[38] = "samp";
acceptableElements[39] = "span";
acceptableElements[40] = "strong";
acceptableElements[41] = "sub";
acceptableElements[42] = "sup";

acceptableElements[43] = new Array;
acceptableElements[43][0] = "table";
acceptableElements[43][1] = "cols";
acceptableElements[43][2] = "summary";

acceptableElements[44] = "tbody";

acceptableElements[45] = new Array;
acceptableElements[45][0] = "td";
acceptableElements[45][1] = "abbr";
acceptableElements[45][2] = "axis";
acceptableElements[45][3] = "colspan";
acceptableElements[45][4] = "headers";
acceptableElements[45][5] = "rowspan";
acceptableElements[45][6] = "scope";

acceptableElements[46] = "tfoot";

acceptableElements[47] = new Array;
acceptableElements[47][0] = "th";
acceptableElements[47][1] = "abbr";
acceptableElements[47][2] = "axis";
acceptableElements[47][3] = "colspan";
acceptableElements[47][4] = "headers";
acceptableElements[47][5] = "rowspan";
acceptableElements[47][6] = "scope";

acceptableElements[48] = "thead";
acceptableElements[49] = "tr";
acceptableElements[50] = "ul";
acceptableElements[51] = "var";


var standardAttributes = new Array;
standardAttributes[0] = "class";
standardAttributes[1] = "id";
standardAttributes[2] = "title";
standardAttributes[3] = "dir";
standardAttributes[4] = "lang";


var inlineElements = new Array;
inlineElements[0] = "#text";
inlineElements[1] = "a";
inlineElements[2] = "abbr";
inlineElements[3] = "acronym";
inlineElements[4] = "b";
inlineElements[5] = "br";
inlineElements[6] = "cite";
inlineElements[7] = "code";
inlineElements[8] = "dfn";
inlineElements[9] = "em";
inlineElements[10] = "img";
inlineElements[11] = "kbd";
inlineElements[12] = "q";
inlineElements[13] = "samp";
inlineElements[14] = "span";
inlineElements[15] = "strong";
inlineElements[16] = "sub";
inlineElements[17] = "sup";
inlineElements[18] = "var";








cleanHTML = function(theHTML) {

	// -- uncomment for testing
	//alert("UNCLEAN: " + theHTML);

	// Create temp DOM object
	theTempObj = document.createElement("body");

	// Remove leading and trailing whitespace, no break spaces from HTML string
	theHTML = theHTML.replace(/^\s+/, "");
	theHTML = theHTML.replace(/\s+$/, "");
	theHTML = theHTML.replace(/&nbsp;/g, ' ');

	// Remove Word xml declarations and namespaced tags
	theHTML = theHTML.replace(/<\?xml[^>]*>/g, "");
	theHTML = theHTML.replace(/<[^ >]+:[^>]*>/g, "");
	theHTML = theHTML.replace(/<\/[^ >]+:[^>]*>/g, "");

	// Set all the HTML elements to lowercase
	theHTML = lowerCaseAndAttributeQuote(theHTML);

	// Replace IE's deprecated elements with valid HTML
	theHTML = theHTML.replace(/<b ([^>]*)[^>]*>/gim, "<strong $1>");
	theHTML = theHTML.replace(/<b>/g, "<strong>");
	theHTML = theHTML.replace(/<\/b>/g, "</strong>");
	theHTML = theHTML.replace(/<i ([^>]*)[^>]*>/gim, "<em $1>");
	theHTML = theHTML.replace(/<i>/g, "<em>");
	theHTML = theHTML.replace(/<\/i>/g, "</em>");

	// Sort out IE's annoying habit of not closing list items
	if(document.all) {
		theHTML = theHTML.replace(/<\/li>/g, "\n");
		theHTML = theHTML.replace(/<li>([^\n]*)\n/g, "<li>$1</li>\n");
	}

	// Remove empty tags
	theHTML = theHTML.replace(/<(h1|h2|h3|h4|h5|h6|ol|ul|li|p|strong|em|span|div)><\/(h1|h2|h3|h4|h5|h6|ol|ul|li|p|strong|em|span|div)>/gim,"");

	// Load HTML string into temp DOM object and parse - removing all unacceptable HTML elements and attributes
	theTempObj.innerHTML = theHTML;
	parseDOM(theTempObj, 1);
	theHTML = theTempObj.innerHTML;

	// Load HTML string into temp DOM object and remove all unwanted elements left after cleaning
	theTempObj.innerHTML = theHTML;
	removeUnwantedNodes(theTempObj, 1);
	theHTML = theTempObj.innerHTML;

	// Load HTML string into temp DOM object and make sure no inline elements are floating outside a block level object
	theTempObj.innerHTML = theHTML;
	theTempObj = removeEmptyTextNodes(theTempObj);
	theTempObj = checkBlockLevelElements(theTempObj);
	theHTML = theTempObj.innerHTML;

	// Load HTML string into temp DOM object and check for any anchor links
	theTempObj.innerHTML = theHTML;
	theHTML = checkForAnchors(theTempObj);
	theHTML = theTempObj.innerHTML;

	// Clean up again after rebuilding the DOM
	theHTML = lowerCaseAndAttributeQuote(theHTML);
	theHTML = theHTML.replace(/<br[^>]*>/gim, "<br />");
	theHTML = theHTML.replace(/<img ([^>]*)[^>]*>/gim, "<img $1 />");

	// Remove BRs right *BEFORE* the end of block level elements
	theHTML = theHTML.replace(/<br \/>\s*<\/(h1|h2|h3|h4|h5|h6|li|p)/gim, "</$1");

	// Remove BRs right *AFTER* the end of block level elements
	theHTML = theHTML.replace(/<\/(h1|h2|h3|h4|h5|h6|li|p)[^>]*>\s*<br \/>/gim, "</$1>");

	// Remove any whitespace before and after tags
	theHTML = removeWhiteSpaceBetweenTags(theHTML);

	theHTML = encodeSpecialCharacters(theHTML);

	// -- uncomment for testing
	//alert("FINAL CLEANED: " + theHTML);

	return theHTML;
}








function fixImageAttributes(theParentObject) {

	var allImages = theParentObject.getElementsByTagName('img');
	var theLoop = LoopChar[0];
	var theCounter = 0;

	for (theLoop=0; theLoop<allImages.length; theLoop++) {
		for (var a=0; a<allImages[theLoop].attributes.length; a++) {
			if(allImages[theLoop].attributes[a].nodeName.toLowerCase() == "width" && allImages[theLoop].attributes[a].specified) {

				imageAttributes[theCounter] = new Array;
				imageAttributes[theCounter][0] = allImages[theLoop].id;
				imageAttributes[theCounter][1] = allImages[theLoop].width;
				imageAttributes[theCounter][2] = allImages[theLoop].height;

				allImages[theLoop].removeAttribute("id");
				//allImages[theLoop].removeAttribute("width");
				//allImages[theLoop].removeAttribute("height");
				allImages[theLoop].setAttribute("id", "storedAttributes" + theCounter)

				theCounter = theCounter + 1;
			}
		}
	}
};








function parseDOM(theParentObject, theDepth) {

	var theLoop = LoopChar[theDepth - 1];

	for (theLoop=0; theLoop<theParentObject.childNodes.length; theLoop++) {

		var theNode = theParentObject.childNodes[theLoop];
		var theNodeName = theNode.nodeName.toLowerCase();

		// If the current node is a text element, ignore it
		if(theNodeName == "#text") {
			continue;

		} else {
			// Loop through the acceptableElements array
			// If the current node *is* acceptable...
			var inAcceptableElementsArray = isInArray(acceptableElements, theNodeName, "bool");

			if(inAcceptableElementsArray == true) {

				// Check to see if the current node has any attributes
				if (countAttributes(theNode) > 0) {

					// Create a temp node of the same type as the current node
					var theTempNode = document.createElement(theNodeName);
					var theArrayIndex = isInArray(acceptableElements, theNodeName, "index");

					for (var a=0; a<theNode.attributes.length; a++) {

						// As IE "sees dead attributes" we need to check that each item in the attributes array actually exists for the current node
						if (theNode.attributes[a].specified) {

							var theAttributeName = theNode.attributes[a].nodeName.toLowerCase();
							var theAttributeValue = theNode.attributes[a].nodeValue;

							// First check to see if the current node has a style attribute
							// If its value is 'font-weight: bold;' add a 'strong' element
							// If its value is 'font-style: italic;' add an 'em' element
							// If this is a Mozilla styled span it will get cleaned up by the removeUnwantedNodes function
							if(theAttributeName == "style") {
								if(theNode.attributes[a].nodeValue != null) {
									if(theAttributeValue.toLowerCase().indexOf("font-weight: bold;") != -1) {
										var tempStrongElement = document.createElement("strong");
										if(theNode.childNodes.length > 0) {
											tempStrongElement.innerHTML = theNode.innerHTML;
											theNode.innerHTML = "";
											theNode.appendChild(tempStrongElement);
										}
									}
									if(theAttributeValue.toLowerCase().indexOf("font-style: italic;") != -1) {
										var tempEmElement = document.createElement("em");
										if(theNode.childNodes.length > 0) {
											tempEmElement.innerHTML = theNode.innerHTML;
											theNode.innerHTML = "";
											theNode.appendChild(tempEmElement);
										}
									}
								}
							}

							// Next check to see if this node type has any specific attributes
							if(theArrayIndex != -1) {

								if(isArray(acceptableElements[theArrayIndex])) {

									// If it does, loop through the current node's attributes and where we encounter an acceptable one, apply this to our temp node
									var inSpecificAttributesArray = isInArray(acceptableElements[theArrayIndex], theAttributeName, "bool");
									if(inSpecificAttributesArray == true) {
										//alert(theAttributeName + " - " + theAttributeValue);

										// Due to a DOM implementation issue in IE, if this is the name attribute
										// we need to create a new node and assign it's name value at the point of creation
										if(theAttributeName == "name") {
											var theNewTempNode = document.createNamedElement(theNodeName, theAttributeValue);
											for (var q=0; q<theTempNode.attributes.length; q++) {
												if (theTempNode.attributes[q].specified) {
													theNewTempNode.setAttribute(theTempNode.attributes[q].nodeName, theTempNode.attributes[q].nodeValue);
												}
											}
											theTempNode = theNewTempNode;
										} else {
											theTempNode.setAttribute(theAttributeName, theAttributeValue);
										}
									}
								}
							}

							// Now check to see if the current node contains any standard attributes
							var inStandardAttributesArray = isInArray(standardAttributes, theAttributeName, "bool");
							if(inStandardAttributesArray == true) {

								// If this is a class attribute, check to see if it is a Mircosoft Word one, if it is ignore it
								if(theAttributeName == "class") {
									if(theAttributeValue.toLowerCase().indexOf("mso") == -1) {
										theTempNode.setAttribute(theAttributeName, theAttributeValue);
									}
								} else {
									theTempNode.setAttribute(theAttributeName, theAttributeValue);
								}
							}
						}
					}

					// Now that we have transfered all the acceptable attributes from the current node to the temp node
					// check to see if the current node has any child nodes, if it does copy over its inner HTML to the temp node
					if(theNode.childNodes.length > 0) {
						copyAllChildren(theNode, theTempNode);
					}

					// Check to see if this node is an image and if so whether we have stored it's true attributes
					if(theTempNode.nodeName.toLowerCase() == "img" && theTempNode.id.indexOf("storedAttributes") != -1) {
						// If so, retreive it's true attributes from the array
						var theImgAttrArrayIndex = theTempNode.id.substring(16,theTempNode.id.length);
						// Remove it's temp ID
						theTempNode.removeAttribute("id");
						// If it had a true ID, replace it
						if(imageAttributes[theImgAttrArrayIndex][0].length > 0) theTempNode.setAttribute("id", imageAttributes[theImgAttrArrayIndex][0]);
						// Replace it's true width and height
						theTempNode.setAttribute("width", imageAttributes[theImgAttrArrayIndex][1]);
						theTempNode.setAttribute("height", imageAttributes[theImgAttrArrayIndex][2]);
					}

					// Now we have created a complete copy of the current node, minus any unacceptable attributes,
					// replace the current node with the temp node in the parent object
					theParentObject.replaceChild(theTempNode, theParentObject.childNodes[theLoop]);
					theNode = theTempNode;
				}
			} else {

				// If the current node *is not* acceptable it needs to be removed
				// Find the current node's parent
				var theParentNode = theNode.parentNode;

				// If the current node has children, move them all up one level to replace the current node
				if (theNode.childNodes.length > 0) {
					replaceNodeWithChildren(theNode);
				}
				// If it has no children, simply remove it from the parent object
				else {
					theParentObject.removeChild(theParentObject.childNodes[theLoop]);
				}
				theNode = theParentNode;
			}
		}

		if (theNode.childNodes.length > 0) {
			parseDOM(theNode, theDepth + 1);
		}
	}
}








function removeUnwantedNodes(theParentObject, theDepth) {

	var theLoop = LoopChar[theDepth - 1];

	for (theLoop=0; theLoop<theParentObject.childNodes.length; theLoop++) {

		var theParentNodeName = theParentObject.nodeName.toLowerCase();
		var theNode = theParentObject.childNodes[theLoop];
		var theNodeName = theNode.nodeName.toLowerCase();
		var removeNode = false;

		// If the node is of the same type as it's parent...
		if (theNodeName == theParentNodeName) {
				// and it has no class - remove it
				if (theNode.className == "") {
					removeNode = true;
				}
		}

		// If the node is a span without any attributes, or one with only a lang attribute - remove it
		if (theNodeName == "span") {
			if (countAttributes(theNode) == 0) {
				removeNode = true;
			}
			if (countAttributes(theNode) == 1) {
				// In IE the 1st attribute is always 'language' and the 8th always 'lang'
				// so if either is specified we know it's the only value we have and we can remove the node
				if (document.all) {
					if(theNode.attributes[0].specified || theNode.attributes[8].specified) {
						removeNode = true;
					}
				} else {
					if(theNode.attributes[0].nodeName.toLowerCase() == "lang" || theNode.attributes[0].nodeName.toLowerCase() == "language") {
						removeNode = true;
					}
				}
			}
		}

		// If we need to remove this node do it now
		if (removeNode == true) {
			var theParentNode = theNode.parentNode;
			if (theNode.childNodes.length > 0) {
				replaceNodeWithChildren(theNode);
			} else {
				theParentObject.removeChild(theParentObject.childNodes[theLoop]);
			}
			theNode = theParentNode;
		}


		if (theNode.childNodes.length > 0) {
			removeUnwantedNodes(theNode, theDepth + 1);
		}
	}
}








function countAttributes(theNode) {

	var theAttributesCount = 0;

	for (i=0; i<theNode.attributes.length; i++) {
		if (theNode.attributes[i].specified) {
			theAttributesCount++;
		}
	}
	return theAttributesCount;
}








function isInArray(theArray, theValue, theRetrun) {
	for(ta = 0; ta < theArray.length; ta++) {
		if(isArray(theArray[ta])) {
			if(theArray[ta][0] == theValue) {
				if (theRetrun == "bool") {
					return true;
				} else {
					return ta;
				}
			}
		} else {
			if(theArray[ta] == theValue) {
				if (theRetrun == "bool") {
					return true;
				} else {
					return ta;
				}
			}
		}
	}
	if (theRetrun == "bool") {
		return false;
	} else {
		return -1;
	}
}









document.createNamedElement = function(type, name) {
	var element;
	try {
		element = document.createElement('<'+type+' name="'+name+'">');
	} catch (e) { }
	if (!element || !element.name) { // Not in IE, then
		element = document.createElement(type);
		element.name = name;
	}
	return element;
}







checkForAnchors = function(theParentObject) {
	var allLinks = theParentObject.getElementsByTagName("a");
	for(var i=0; i<allLinks.length; i++) {
		if (allLinks[i].href != "") {
			allLinks[i].href = restoreAnchors(allLinks[i].href);
		}
	}
}







restoreAnchors = function(theHref) {
	if (theHref.indexOf("#") != -1) {
		if(theHref.split("#")[0] == window.location.href.split("#")[0]) {
			return "#" + theHref.split("#")[1];
		} else {
			return theHref;
		}
	} else {
		return theHref;
	}
}








function encodeSpecialCharacters(theHTML) {

	theHTML = theHTML.replace(/\u2014/gim,"&mdash;");
	theHTML = theHTML.replace(/\u2013/gim,"&ndash;");
	theHTML = theHTML.replace(/\u2018/gim,"&lsquo;");
	theHTML = theHTML.replace(/\u2019/gim,"&rsquo;");
	theHTML = theHTML.replace(/\u201C/gim,"&ldquo;");
	theHTML = theHTML.replace(/\u201D/gim,"&rdquo;");

	theHTML = theHTML.replace(/\u00E0/gim,"&agrave;");
	theHTML = theHTML.replace(/\u00E1/gim,"&aacute;");

	theHTML = theHTML.replace(/\u00E8/gim,"&egrave;");
	theHTML = theHTML.replace(/\u00E9/gim,"&eacute;");

	return theHTML;
}








// Replace a node with its children
// delete the item and move its children up one level in the hierarchy
function replaceNodeWithChildren(theNode) {
	var theChildren = new Array();
	var theParent = theNode.parentNode;

	if (theParent != null) {
		for (var i = 0; i < theNode.childNodes.length; i++) {
			theChildren.push(theNode.childNodes[i].cloneNode(true));
		}

		for (var i = 0; i < theChildren.length; i++) {
			theParent.insertBefore(theChildren[i], theNode);
		}

		theParent.removeChild(theNode);
		return theParent;
	}
	return true;
}








// Loop through one node's children and copy them over to a new node
function copyAllChildren(theOriginalNode, theTargetNode) {
	while(theOriginalNode.childNodes.length > 0) {
		theChild = theOriginalNode.firstChild.cloneNode(true);
		theTargetNode.appendChild(theChild);
		theOriginalNode.removeChild(theOriginalNode.firstChild);
	}
}








// Loop through all the top level nodes and make sure they are all block level
// If not wrap them in a p
function checkBlockLevelElements(theParentNode) {

	var theReturnObj = document.createElement("body");
	var tempParagraphObj = document.createElement("p");
	var inLineFlag = false;

	for (var i = 0; i < theParentNode.childNodes.length; i++) {

		var theChildNode = theParentNode.childNodes[i];
		var theChildClone = theParentNode.childNodes[i].cloneNode(true);

		// If this is a block level element OR a link with no text (i.e. and anchor)
		if (isInlineName(theChildNode.nodeName) == false || (theChildNode.nodeName.toLowerCase() == "a" && theChildNode.childNodes.length == 0)) {

			if (inLineFlag == true) {
				theReturnObj.appendChild(tempParagraphObj);
				tempParagraphObj = document.createElement("p");
				inLineFlag = false;

				theReturnObj.appendChild(theChildClone);
			} else {
				theReturnObj.appendChild(theChildClone);
			}
		} else {
			inLineFlag = true;
			tempParagraphObj.appendChild(theChildClone);
		}
	}

	if (inLineFlag == true) {
		theReturnObj.appendChild(tempParagraphObj);
	}
	return theReturnObj;
}








// Remove all text nodes containing just whitespace
function removeEmptyTextNodes(theNode) {
	for (var i = 0; i < theNode.childNodes.length; i++) {
		if ((theNode.childNodes[i].nodeName.toLowerCase() == "#text" && theNode.childNodes[i].data.search(/^\s*$/) != -1) || (theNode.childNodes[i].nodeName.toLowerCase() == "#comment")) {
			theNode.removeChild(theNode.childNodes[i]);
			i--;
		}
	}
	return theNode
};









removeFirstTextNode = function(theNode) {
	var removedFirst = false;
	for (var i = 0; i < theNode.childNodes.length; i++) {
		if (theNode.childNodes[i].nodeName.toLowerCase() == "#text" && removedFirst == false) {
			theNode.removeChild(theNode.childNodes[i]);
			removedFirst = true;
			i--;
		}
	}
	return theNode
};










getListType = function(theNode) {
	theText = theNode.childNodes[0];
	while (theText.nodeName.toLowerCase() != "#text") {
		theText = theText.childNodes[0];
	}
	var isOrdered = /[a-zA-Z0-9]/g;
	if (theText.nodeValue != "") {
		if (isOrdered.test(theText.nodeValue)) {
			return "ol";
		} else {
			return "ul";
		}
	}
};








// Check if a string is the nodeName of an inline element
function isInlineName(element) {
	var theName = element.toLowerCase();
	for (var i = 0; i < inlineElements.length; i++) {
		if (theName == inlineElements[i]) {
			return true;
		}
	}
	return false;
}








// Check if a string is the nodeName of an block element
function isBlockName(element) {
	var blockList = new Array("address", "blockquote", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "p", "table", "ul");
	var theName = element.toLowerCase();
	for (var i = 0; i < blockList.length; i++) {
		if (theName == blockList[i]) {
			return true;
		}
	}
	return false;
}








function nodeContainsElement(nodeObj, elementName) {
	for (g=0; g<nodeObj.childNodes.length; g++) {
		if (nodeObj.childNodes[g].nodeName.toLowerCase() == elementName.toLowerCase()) {
			return true;
		}
	}
	return false;
}








function removeWhiteSpaceBetweenTags(theHTML) {
	theHTML = theHTML.replace(/> </gim,"><");
	return theHTML;
}








function removeAllFormatting(theHTML) {
	try {
		theHTML = theHTML.replace(/<((.|\s)+?)>/gim, "");
		theHTML = theHTML.replace(/&nbsp;/gim, ' ');
		theHTML = theHTML.replace(/\s+/gim, ' ');
		theHTML = theHTML.replace(/\r\n/gim, '');
		return theHTML;
	}
	catch(e) {
		return theHTML;
	}
}








function isArray(obj) {
	if(!obj) return;
	if (obj.constructor.toString().indexOf("Array") == -1) {
		return false;
	} else {
		return true;
	}
}








function lowerCaseAndAttributeQuote(theHTML) {

	// Replace uppercase element names with lowercase
	theHTML = theHTML.replace(/<[^> ]*/g, function(match){return match.toLowerCase();});

	// Replace uppercase attribute names with lowercase
	theHTML = theHTML.replace(/<[^>]*>/g, function(match) {
			match = match.replace(/ [^=]+=/g, function(match2){return match2.toLowerCase();});
			return match;
		});

	// Put quotes around unquoted attributes
	theHTML = theHTML.replace(/<[^>]*>/g, function(match) {
			match = match.replace(/( [^=]+=)([^"][^ >]*)/g, "$1\"$2\"");
			return match;
		});

	return theHTML;
}


