summaryrefslogtreecommitdiff
path: root/js/readability.js
diff options
context:
space:
mode:
Diffstat (limited to 'js/readability.js')
-rwxr-xr-xjs/readability.js65
1 files changed, 27 insertions, 38 deletions
diff --git a/js/readability.js b/js/readability.js
index 5619c38..ce8fb52 100755
--- a/js/readability.js
+++ b/js/readability.js
@@ -1,8 +1,19 @@
-var readabilityVersion = "v1.0.0.1";
+var readabilityVersion = "1.0.0.1";
var emailSrc = "http://proto1.arc90.com/readability/email.php";
var highestScore = -1;
var malformedContent = false;
+// for now we want to hold on to our debugging, but if a browser doesn't
+// support it, we'll create a console.log() method that does nothing
+if (typeof console == 'undefined')
+{
+ var console = {};
+
+ console.log = function(msg) {
+ return;
+ };
+}
+
(function(){
// some sites use plugins (jCarousel) that when Readability removes scripts
// or does something funky it causes an alert to appear every few seconds,
@@ -141,36 +152,8 @@ function determineContentScore(score, parent, element)
function parseContent() {
- // replace all doubled-up <BR> tags with <P> tags, and remove fonts
- //var pattern = new RegExp("<br/?>[ \r\n\s]*<br/?>", "gi");
- //document.body.innerHTML = document.body.innerHTML.replace(pattern, "</p><p>").replace(/<\/?font[^>]*>/gi, "");
- document.body.innerHTML = document.body.innerHTML.replace(/<br\/?>\s*<br\/?>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
-
- /*
-
- // was part of the PRE based content parsing but tweaking below
- // could resolve the bad regex above replacing double br tags
- // with an empty paragraph
-
- var html = document.body.innerHTML;
- var firstTime = true;
-
- while (html.indexOf('\n\n') >= 0)
- {
- if (firstTime)
- {
- html = html.replace('\n\n', '<p>'); // first item
- firstTime = false;
- }
-
- if (html.indexOf('\n\n') == html.lastIndexOf('\n\n'))
- html = html.replace('\n\n', '</p>'); // last item
- else
- html = html.replace('\n\n', '</p><p>'); // every item in between
- }
-
- document.body.innerHTML = html;
- */
+ // replace all doubled-up <BR> tags with <P> tags, and remove inline fonts
+ document.body.innerHTML = document.body.innerHTML.replace(/<br[^>]*>\s*<br[^>]*>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
var articleContent = document.createElement("DIV");
var paragraphs = document.getElementsByTagName("P");
@@ -615,7 +598,8 @@ function normalize(text)
*/
function removeBreaks(element)
{
- element.innerHTML = element.innerHTML.replace(/(<br[^>]*\/?>(\s|&nbsp;?)*){1,}/gi, "<br />");
+ // FIXME: the regex doesn't seem to pick up consecutive br tags, need to revisit
+ element.innerHTML = element.innerHTML.replace(/((<br[^>]*>)[\s]*(<br[^>]*>)){1,}/gi, "<br />");
}
/**
@@ -681,15 +665,16 @@ function removeStyles()
{
var style = styleTags[j];
- // TODO: need to verify that clearing out innerText works in IE
- // might want to consider removing from parent
if (style.textContent)
{
style.textContent = "";
}
else
{
- style.innerText = "";
+ // most browsers support textContent but IE has its own way but it
+ // seems that Firefox supports both, check link for last example
+ // http://www.phpied.com/the-star-hack-in-ie8-and-dynamic-stylesheets/
+ style.styleSheet.cssText = "";
}
}
}
@@ -701,12 +686,16 @@ function removeStylesheets()
{
// TODO: need to do more research, not sure if disabling is enough
// for cross browser compatibility, might consider removal via parent
- // just as done in the removeScripts method
+ // just as done in the removeScripts method, but will need to retrieve
+ // all LINK tags and make sure rel attr is "stylesheet" or that its
+ // type attr is "text/css"
for (var k = 0; k < document.styleSheets.length; k++)
{
- if (document.styleSheets[k].href != null && document.styleSheets[k].href.lastIndexOf("readability") == -1)
+ var styleSheet = document.styleSheets[k];
+
+ if (styleSheet.href != null && styleSheet.href.lastIndexOf("readability") == -1)
{
- document.styleSheets[k].disabled = true;
+ styleSheet.disabled = true;
}
}
}