summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO2
-rw-r--r--getabook.c20
2 files changed, 18 insertions, 4 deletions
diff --git a/TODO b/TODO
index daa9d12..4c6d808 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1 @@
-in getabook, the web client tries downloading sequentially the first few pages, regardless of whether they're in the available page list. this actually works (some or all of these pages will return), so we should implement something similar too. exactly how it knows when to stop looking is not clear, at least with the one i tried, it just tried all of the first 25 pages.
-
submit 'pad' file to websites http://padsites.asp-software.org/
diff --git a/getabook.c b/getabook.c
index 78cd511..f59c61f 100644
--- a/getabook.c
+++ b/getabook.c
@@ -68,7 +68,7 @@ int getpagelist()
char b[STRMAX] = "";
char *buf = NULL;
char *s, *c;
- int i;
+ int i, n, found;
Page *p;
snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid);
@@ -103,6 +103,22 @@ int getpagelist()
fillurls(buf);
free(buf);
+
+ /* ensure first 25 pages are included, as sometimes they work
+ * even if not listed. */
+ for(i=0; i<25 && i<MAXPAGES; i++) {
+ found = 0;
+ for(n=0; n<numpages; n++) {
+ if(pages[n]->num == i)
+ found = 1;
+ }
+ if(!found) {
+ p=pages[numpages++]=malloc(sizeof(**pages));;
+ p->num = i;
+ p->url[0] = '\0';
+ }
+ }
+
return 0;
}
@@ -139,7 +155,7 @@ int getpage(Page *page)
fprintf(stderr, "can't parse host of %s\n", page->url);
return 1;
}
-
+
if(gettofile(host, page->url, NULL, NULL, path, 0)) {
fprintf(stderr, "%d failed\n", page->num);
return 1;