From ec948dcb9966f08efeb9c09ccf81d66966e1bfec Mon Sep 17 00:00:00 2001 From: JJfutbol Date: Tue, 14 Jul 2009 00:17:21 +0000 Subject: - updated removeStyles to remove element from parent - if parent not defined the text is cleared - updated functions with single for loop to use i as index - added tweetback as bad keyword for removing non content elements git-svn-id: http://arc90labs-readability.googlecode.com/svn/trunk@64 d4e419ec-0920-11de-bbfd-a7c1bc4c261e --- js/readability.js | 81 ++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/js/readability.js b/js/readability.js index 93a3e80..e0ea072 100755 --- a/js/readability.js +++ b/js/readability.js @@ -493,12 +493,13 @@ function removeNonContentElement(element, tagName) if (!containsOnlyText) { descendant.parentNode.removeChild(descendant); - } - continue; + } + + continue; } else {*/ - var badKeywords = ["ad", "captcha", "classified", "clear", "comment", "crumbs", "footer", "footnote", "leftcolumn", "listing", "menu", "meta", "module", "nav", "navbar", "rightcolumn", "sidebar", "sponsor", "tab", "tag", "toolbar", "tools", "trackback", "widget"]; + var badKeywords = ["ad", "captcha", "classified", "clear", "comment", "crumbs", "footer", "footnote", "leftcolumn", "listing", "menu", "meta", "module", "nav", "navbar", "rightcolumn", "sidebar", "sponsor", "tab", "tag", "toolbar", "tools", "trackback", "tweetback", "widget"]; // should improve this but for if the element has a single bad keyword remove it for (var j = 0; j < badKeywords.length; j++) @@ -535,20 +536,6 @@ function removeNonContentElement(element, tagName) // //-------------------------------------------------------------------------- -/** - * Returns the word count for the specified element. - * - * @param element The element. - * - * @returns A count indicating the number of words - */ -function getWordCount(element) -{ - // normalize replaces consecutive spacing with a single space, - // by then triming, we can safely split on a space for a count - return trim(normalize(getText(element))).split(" ").length; -} - /** * Returns the text content of the specified element. * @@ -563,6 +550,20 @@ function getText(element) : element.innerText; } +/** + * Returns the word count for the specified element. + * + * @param element The element. + * + * @returns A count indicating the number of words + */ +function getWordCount(element) +{ + // normalize replaces consecutive spacing with a single space, + // by then triming, we can safely split on a space for a count + return trim(normalize(getText(element))).split(" ").length; +} + /** * Determines if the specified element has one of the provided array of * ancestors and if so returns true. @@ -647,6 +648,10 @@ function removeElementStyles(element) if (typeof element.removeAttribute == "function") element.removeAttribute("style"); + + // TODO: do not use firstChild and nextSibling, use childNodes array instead + + // prepare to remove styles on all children and siblings var childElement = element.firstChild; @@ -654,7 +659,9 @@ function removeElementStyles(element) { if (childElement.nodeType == 1) { - childElement.removeAttribute("style"); + // remove any root styles, if we're able + if (typeof element.removeAttribute == "function") + childElement.removeAttribute("style"); // remove styles recursively removeElementStyles(childElement); @@ -672,14 +679,14 @@ function removeScripts() var scripts = document.getElementsByTagName("SCRIPT"); var numScripts = scripts.length - 1; - for (var n = numScripts; n >= 0; n--) + for (var i = numScripts; i >= 0; i--) { - var script = scripts[n]; + var script = scripts[i]; // remove inline or external referencing scripts (that aren't Readability related) if (!script.src || (script.src && script.src.indexOf("readability") == -1)) { - script.parentNode.removeChild(scripts[n]); + script.parentNode.removeChild(script); } } } @@ -689,22 +696,34 @@ function removeScripts() */ function removeStyles() { - var styleTags = document.getElementsByTagName("STYLE"); + var styles = document.getElementsByTagName("STYLE"); + var startIndex = styles.length - 1; - for (var j = 0; j < styleTags.length; j++) + for (var i = startIndex; i >= 0; i--) { - var style = styleTags[j]; + var style = styles[i]; - if (style.textContent) + // we prefer to remove the tag completely but if not able we'll clear it + if (style.parentNode) { - style.textContent = ""; - } + style.parentNode.removeChild(style); + } else { - // most browsers support textContent but IE has its own way but it - // seems that Firefox supports both, check link for last example - // http://www.phpied.com/the-star-hack-in-ie8-and-dynamic-stylesheets/ - style.styleSheet.cssText = ""; + if (style.textContent) + { + style.textContent = ""; + } + else + { + // most browsers support textContent but IE has its own way but it + // seems that Firefox supports both, check link for last example + // http://www.phpied.com/the-star-hack-in-ie8-and-dynamic-stylesheets/ + // note that if the style tag contains no text content, then + // no styleSheet object is defined either + if (style.styleSheet) + style.styleSheet.cssText = ""; + } } } } -- cgit v1.2.3