summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJfutbol <JJfutbol@d4e419ec-0920-11de-bbfd-a7c1bc4c261e>2009-07-14 00:17:21 +0000
committerJJfutbol <JJfutbol@d4e419ec-0920-11de-bbfd-a7c1bc4c261e>2009-07-14 00:17:21 +0000
commitec948dcb9966f08efeb9c09ccf81d66966e1bfec (patch)
treee3c2b3fa33c0d4b58ec614391e32a9c9fdfb70bd
parent94a1e8525e666ff5685b58795386da72b8e873d2 (diff)
downloadreadability-simple-ec948dcb9966f08efeb9c09ccf81d66966e1bfec.tar.bz2
readability-simple-ec948dcb9966f08efeb9c09ccf81d66966e1bfec.zip
- updated removeStyles to remove element from parent
- if parent not defined the text is cleared - updated functions with single for loop to use i as index - added tweetback as bad keyword for removing non content elements git-svn-id: http://arc90labs-readability.googlecode.com/svn/trunk@64 d4e419ec-0920-11de-bbfd-a7c1bc4c261e
-rwxr-xr-xjs/readability.js81
1 files changed, 50 insertions, 31 deletions
diff --git a/js/readability.js b/js/readability.js
index 93a3e80..e0ea072 100755
--- a/js/readability.js
+++ b/js/readability.js
@@ -493,12 +493,13 @@ function removeNonContentElement(element, tagName)
if (!containsOnlyText)
{
descendant.parentNode.removeChild(descendant);
- }
- continue;
+ }
+
+ continue;
}
else
{*/
- var badKeywords = ["ad", "captcha", "classified", "clear", "comment", "crumbs", "footer", "footnote", "leftcolumn", "listing", "menu", "meta", "module", "nav", "navbar", "rightcolumn", "sidebar", "sponsor", "tab", "tag", "toolbar", "tools", "trackback", "widget"];
+ var badKeywords = ["ad", "captcha", "classified", "clear", "comment", "crumbs", "footer", "footnote", "leftcolumn", "listing", "menu", "meta", "module", "nav", "navbar", "rightcolumn", "sidebar", "sponsor", "tab", "tag", "toolbar", "tools", "trackback", "tweetback", "widget"];
// should improve this but for if the element has a single bad keyword remove it
for (var j = 0; j < badKeywords.length; j++)
@@ -536,20 +537,6 @@ function removeNonContentElement(element, tagName)
//--------------------------------------------------------------------------
/**
- * Returns the word count for the specified element.
- *
- * @param element The element.
- *
- * @returns A count indicating the number of words
- */
-function getWordCount(element)
-{
- // normalize replaces consecutive spacing with a single space,
- // by then triming, we can safely split on a space for a count
- return trim(normalize(getText(element))).split(" ").length;
-}
-
-/**
* Returns the text content of the specified element.
*
* @param element The element from which to retrieve its text content.
@@ -564,6 +551,20 @@ function getText(element)
}
/**
+ * Returns the word count for the specified element.
+ *
+ * @param element The element.
+ *
+ * @returns A count indicating the number of words
+ */
+function getWordCount(element)
+{
+ // normalize replaces consecutive spacing with a single space,
+ // by then triming, we can safely split on a space for a count
+ return trim(normalize(getText(element))).split(" ").length;
+}
+
+/**
* Determines if the specified element has one of the provided array of
* ancestors and if so returns true.
*
@@ -647,6 +648,10 @@ function removeElementStyles(element)
if (typeof element.removeAttribute == "function")
element.removeAttribute("style");
+
+ // TODO: do not use firstChild and nextSibling, use childNodes array instead
+
+
// prepare to remove styles on all children and siblings
var childElement = element.firstChild;
@@ -654,7 +659,9 @@ function removeElementStyles(element)
{
if (childElement.nodeType == 1)
{
- childElement.removeAttribute("style");
+ // remove any root styles, if we're able
+ if (typeof element.removeAttribute == "function")
+ childElement.removeAttribute("style");
// remove styles recursively
removeElementStyles(childElement);
@@ -672,14 +679,14 @@ function removeScripts()
var scripts = document.getElementsByTagName("SCRIPT");
var numScripts = scripts.length - 1;
- for (var n = numScripts; n >= 0; n--)
+ for (var i = numScripts; i >= 0; i--)
{
- var script = scripts[n];
+ var script = scripts[i];
// remove inline or external referencing scripts (that aren't Readability related)
if (!script.src || (script.src && script.src.indexOf("readability") == -1))
{
- script.parentNode.removeChild(scripts[n]);
+ script.parentNode.removeChild(script);
}
}
}
@@ -689,22 +696,34 @@ function removeScripts()
*/
function removeStyles()
{
- var styleTags = document.getElementsByTagName("STYLE");
+ var styles = document.getElementsByTagName("STYLE");
+ var startIndex = styles.length - 1;
- for (var j = 0; j < styleTags.length; j++)
+ for (var i = startIndex; i >= 0; i--)
{
- var style = styleTags[j];
+ var style = styles[i];
- if (style.textContent)
+ // we prefer to remove the tag completely but if not able we'll clear it
+ if (style.parentNode)
{
- style.textContent = "";
- }
+ style.parentNode.removeChild(style);
+ }
else
{
- // most browsers support textContent but IE has its own way but it
- // seems that Firefox supports both, check link for last example
- // http://www.phpied.com/the-star-hack-in-ie8-and-dynamic-stylesheets/
- style.styleSheet.cssText = "";
+ if (style.textContent)
+ {
+ style.textContent = "";
+ }
+ else
+ {
+ // most browsers support textContent but IE has its own way but it
+ // seems that Firefox supports both, check link for last example
+ // http://www.phpied.com/the-star-hack-in-ie8-and-dynamic-stylesheets/
+ // note that if the style tag contains no text content, then
+ // no styleSheet object is defined either
+ if (style.styleSheet)
+ style.styleSheet.cssText = "";
+ }
}
}
}