summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJJfutbol <JJfutbol@d4e419ec-0920-11de-bbfd-a7c1bc4c261e>2009-07-21 22:33:23 +0000
committerJJfutbol <JJfutbol@d4e419ec-0920-11de-bbfd-a7c1bc4c261e>2009-07-21 22:33:23 +0000
commit658a366744f1deabe625988a46ae108e116b62a7 (patch)
tree9548fdcf89afd91eda6e83b63197c23ec7316c48
parenta92b5ba9239c324ec4f286ed8bdd931d4ba30ff7 (diff)
downloadreadability-simple-658a366744f1deabe625988a46ae108e116b62a7.tar.bz2
readability-simple-658a366744f1deabe625988a46ae108e116b62a7.zip
added score increment if word count is greater than 30 when parsing content, for now commented out but will continue to test against various sites
git-svn-id: http://arc90labs-readability.googlecode.com/svn/trunk@71 d4e419ec-0920-11de-bbfd-a7c1bc4c261e
-rwxr-xr-xjs/readability.js7
1 files changed, 5 insertions, 2 deletions
diff --git a/js/readability.js b/js/readability.js
index 57eb240..cde729d 100755
--- a/js/readability.js
+++ b/js/readability.js
@@ -129,6 +129,9 @@ function determineContentScore(score, parent, element)
if (element.tagName.toLowerCase() == "p" && getWordCount(element) > 20) //|| (score == 0 && getText(element).length > 10))
score++;
+ //if (getWordCount(element) > 30)
+ // score++;
+
// FIXME: not sure yet if this will be included, this would break
// pages that use multiple containers for content, or we could tweak
// the acceptable minimum... but that would have to be set quite
@@ -148,8 +151,8 @@ function determineContentScore(score, parent, element)
function parseContent() {
// replace all doubled-up <BR> tags with <P> tags, and remove inline fonts
- document.body.innerHTML = document.body.innerHTML.replace(/<br[^>]*>\s*<br[^>]*>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
-
+ document.body.innerHTML = document.body.innerHTML.replace(/<br[^>]*>\s|&nbsp;*<br[^>]*>/gi, "<p />").replace(/<\/?font[^>]*>/gi, "");
+ console.log(document.body.innerHTML.substring(0, 600));
var articleContent = document.createElement("DIV");
var paragraphs = document.getElementsByTagName("P");
var contentBlocks = [];