summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2012-07-13 20:01:55 +0100
committerNick White <git@njw.me.uk>2012-07-13 20:01:55 +0100
commit212213c11531456ab1b4f270e4a5cd9cecba74f6 (patch)
tree9341f292874be3fe642cc13ece396313f9765f0e
parentb426cacb2ef7d14a41d91ffbf3676a4275d02bb3 (diff)
Try first 25 pages even if they aren't indexed with getabook
-rw-r--r--TODO2
-rw-r--r--getabook.c20
2 files changed, 18 insertions, 4 deletions
diff --git a/TODO b/TODO
index daa9d12..4c6d808 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1 @@
-in getabook, the web client tries downloading sequentially the first few pages, regardless of whether they're in the available page list. this actually works (some or all of these pages will return), so we should implement something similar too. exactly how it knows when to stop looking is not clear, at least with the one i tried, it just tried all of the first 25 pages.
-
submit 'pad' file to websites http://padsites.asp-software.org/
diff --git a/getabook.c b/getabook.c
index 78cd511..f59c61f 100644
--- a/getabook.c
+++ b/getabook.c
@@ -68,7 +68,7 @@ int getpagelist()
char b[STRMAX] = "";
char *buf = NULL;
char *s, *c;
- int i;
+ int i, n, found;
Page *p;
snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid);
@@ -103,6 +103,22 @@ int getpagelist()
fillurls(buf);
free(buf);
+
+ /* ensure first 25 pages are included, as sometimes they work
+ * even if not listed. */
+ for(i=0; i<25 && i<MAXPAGES; i++) {
+ found = 0;
+ for(n=0; n<numpages; n++) {
+ if(pages[n]->num == i)
+ found = 1;
+ }
+ if(!found) {
+ p=pages[numpages++]=malloc(sizeof(**pages));;
+ p->num = i;
+ p->url[0] = '\0';
+ }
+ }
+
return 0;
}
@@ -139,7 +155,7 @@ int getpage(Page *page)
fprintf(stderr, "can't parse host of %s\n", page->url);
return 1;
}
-
+
if(gettofile(host, page->url, NULL, NULL, path, 0)) {
fprintf(stderr, "%d failed\n", page->num);
return 1;