(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define('html-janitor', factory);
} else if (typeof exports === 'object') {
module.exports = factory();
} else {
root.HTMLJanitor = factory();
}
}(this, function () {
/**
* @param {Object} config.tags Dictionary of allowed tags.
* @param {boolean} config.keepNestedBlockElements Default false.
*/
function HTMLJanitor(config) {
var tagDefinitions = config['tags'];
var tags = Object.keys(tagDefinitions);
var validConfigValues = tags
.map(function(k) { return typeof tagDefinitions[k]; })
.every(function(type) { return type === 'object' || type === 'boolean' || type === 'function'; });
if(!validConfigValues) {
throw new Error("The configuration was invalid");
}
this.config = config;
}
var blockElementNames = ['P', 'LI', 'TD', 'TH', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'PRE'];
function isBlockElement(node) {
return blockElementNames.indexOf(node.nodeName) !== -1;
}
var inlineElementNames = ['A', 'B', 'STRONG', 'I', 'EM', 'SUB', 'SUP', 'U', 'STRIKE'];
function isInlineElement(node) {
return inlineElementNames.indexOf(node.nodeName) !== -1;
}
HTMLJanitor.prototype.clean = function (html) {
const sandbox = document.implementation.createHTMLDocument('');
const root = sandbox.createElement("div");
root.innerHTML = html;
this._sanitize(sandbox, root);
return root.innerHTML;
};
HTMLJanitor.prototype._sanitize = function (document, parentNode) {
var treeWalker = createTreeWalker(document, parentNode);
var node = treeWalker.firstChild();
if (!node) { return; }
do {
if (node.nodeType === Node.TEXT_NODE) {
// If this text node is just whitespace and the previous or next element
// sibling is a block element, remove it
// N.B.: This heuristic could change. Very specific to a bug with
// `contenteditable` in Firefox: http://jsbin.com/EyuKase/1/edit?js,output
// FIXME: make this an option?
if (node.data.trim() === ''
&& ((node.previousElementSibling && isBlockElement(node.previousElementSibling))
|| (node.nextElementSibling && isBlockElement(node.nextElementSibling)))) {
parentNode.removeChild(node);
this._sanitize(document, parentNode);
break;
} else {
continue;
}
}
// Remove all comments
if (node.nodeType === Node.COMMENT_NODE) {
parentNode.removeChild(node);
this._sanitize(document, parentNode);
break;
}
var isInline = isInlineElement(node);
var containsBlockElement;
if (isInline) {
containsBlockElement = Array.prototype.some.call(node.childNodes, isBlockElement);
}
// Block elements should not be nested (e.g. <li><p>...); if
// they are, we want to unwrap the inner block element.
var isNotTopContainer = !! parentNode.parentNode;
var isNestedBlockElement =
isBlockElement(parentNode) &&
isBlockElement(node) &&
isNotTopContainer;
var nodeName = node.nodeName.toLowerCase();
var allowedAttrs = getAllowedAttrs(this.config, nodeName, node);
var isInvalid = isInline && containsBlockElement;
// Drop tag entirely according to the whitelist *and* if the markup
// is invalid.
if (isInvalid || shouldRejectNode(node, allowedAttrs)
|| (!this.config.keepNestedBlockElements && isNestedBlockElement)) {
// Do not keep the inner text of SCRIPT/STYLE elements.
if (! (node.nodeName === 'SCRIPT' || node.nodeName === 'STYLE')) {
while (node.childNodes.length > 0) {
parentNode.insertBefore(node.childNodes[0], node);
}
}
parentNode.removeChild(node);
this._sanitize(document, parentNode);
break;
}
// Sanitize attributes
for (var a = 0; a < node.attributes.length; a += 1) {
var attr = node.attributes[a];
if (shouldRejectAttr(attr, allowedAttrs, node)) {
node.removeAttribute(attr.name);
// Shift the array to continue looping.
a = a - 1;
}
}
// Sanitize children
this._sanitize(document, node);
} while ((node = treeWalker.nextSibling()));
};
function createTreeWalker(document, node) {
return document.createTreeWalker(node,
NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT | NodeFilter.SHOW_COMMENT,
null, false);
}
function getAllowedAttrs(config, nodeName, node){
if (typeof config.tags[nodeName] === 'function') {
return config.tags[nodeName](node);
} else {
return config.tags[nodeName];
}
}
function shouldRejectNode(node, allowedAttrs){
if (typeof allowedAttrs === 'undefined') {
return true;
} else if (typeof allowedAttrs === 'boolean') {
return !allowedAttrs;
}
return false;
}
function shouldRejectAttr(attr, allowedAttrs, node){
var attrName = attr.name.toLowerCase();
if (allowedAttrs === true){
return false;
} else if (typeof allowedAttrs[attrName] === 'function'){
return !allowedAttrs[attrName](attr.value, node);
} else if (typeof allowedAttrs[attrName] === 'undefined'){
return true;
} else if (allowedAttrs[attrName] === false) {
return true;
} else if (typeof allowedAttrs[attrName] === 'string') {
return (allowedAttrs[attrName] !== attr.value);
}
return false;
}
return HTMLJanitor;
}));