From 658a366744f1deabe625988a46ae108e116b62a7 Mon Sep 17 00:00:00 2001 From: JJfutbol Date: Tue, 21 Jul 2009 22:33:23 +0000 Subject: added score increment if word count is greater than 30 when parsing content, for now commented out but will continue to test against various sites git-svn-id: http://arc90labs-readability.googlecode.com/svn/trunk@71 d4e419ec-0920-11de-bbfd-a7c1bc4c261e --- js/readability.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'js') diff --git a/js/readability.js b/js/readability.js index 57eb240..cde729d 100755 --- a/js/readability.js +++ b/js/readability.js @@ -129,6 +129,9 @@ function determineContentScore(score, parent, element) if (element.tagName.toLowerCase() == "p" && getWordCount(element) > 20) //|| (score == 0 && getText(element).length > 10)) score++; + //if (getWordCount(element) > 30) + // score++; + // FIXME: not sure yet if this will be included, this would break // pages that use multiple containers for content, or we could tweak // the acceptable minimum... but that would have to be set quite @@ -148,8 +151,8 @@ function determineContentScore(score, parent, element) function parseContent() { // replace all doubled-up
tags with

tags, and remove inline fonts - document.body.innerHTML = document.body.innerHTML.replace(/]*>\s*]*>/gi, "

").replace(/<\/?font[^>]*>/gi, ""); - + document.body.innerHTML = document.body.innerHTML.replace(/]*>\s| *]*>/gi, "

").replace(/<\/?font[^>]*>/gi, ""); + console.log(document.body.innerHTML.substring(0, 600)); var articleContent = document.createElement("DIV"); var paragraphs = document.getElementsByTagName("P"); var contentBlocks = []; -- cgit v1.2.3