summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xjs/readability.js7
1 files changed, 5 insertions, 2 deletions
diff --git a/js/readability.js b/js/readability.js
index 57eb240..cde729d 100755
--- a/js/readability.js
+++ b/js/readability.js
@@ -129,6 +129,9 @@ function determineContentScore(score, parent, element)
if (element.tagName.toLowerCase() == "p" && getWordCount(element) > 20) //|| (score == 0 && getText(element).length > 10))
score++;
+ //if (getWordCount(element) > 30)
+ // score++;
+
// FIXME: not sure yet if this will be included, this would break
// pages that use multiple containers for content, or we could tweak
// the acceptable minimum... but that would have to be set quite
@@ -148,8 +151,8 @@ function determineContentScore(score, parent, element)
function parseContent() {
// replace all doubled-up <BR> tags with <P> tags, and remove inline fonts
- document.body.innerHTML = document.body.innerHTML.replace(/<br[^>]*>\s*<br[^>]*>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
-
+ document.body.innerHTML = document.body.innerHTML.replace(/<br[^>]*>\s|&nbsp;*<br[^>]*>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
+ console.log(document.body.innerHTML.substring(0, 600));
var articleContent = document.createElement("DIV");
var paragraphs = document.getElementsByTagName("P");
var contentBlocks = [];