summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--TODO9
-rw-r--r--getgbook.c6
2 files changed, 10 insertions, 5 deletions
diff --git a/TODO b/TODO
index 4703148..4eb35e4 100644
--- a/TODO
+++ b/TODO
@@ -31,9 +31,14 @@ have websummary.sh print the date of release, e.g.
mkdir of bookid and save pages in there
+add cmdline arguments for stdin parsing
+
+merge pageinfo branch
+
+### notes
+
Google will give you up to 5 cookies which get useful pages in immediate succession. It will stop serving new pages to the ip, even with a fresh cookie. So the cookie is certainly not everything.
If one does something too naughty, all requests from the ip to books.google.com are blocked with a 403 'automated requests' error for 24 hours. What causes this ip block is less clear. It certainly isn't after just trying lots of pages with 5 cookies. It seems to be after requesting 100 new cookies in a certain time period - 100 in 5 minutes seemed to do it, as did 100 in ~15 minutes.
-NOTE!!: the method of getting all pages from book page does miss some; they aren't all listed
-* these pages can often be requested, though
+The method of getting all pages from book webpage does miss some; they aren't all listed. These pages can often be requested, though.
diff --git a/getgbook.c b/getgbook.c
index e60316f..3fbdf47 100644
--- a/getgbook.c
+++ b/getgbook.c
@@ -33,10 +33,10 @@ int getpagelist(char *bookid, Page **pages)
snprintf(url, URLMAX, "/books?id=%s&printsec=frontcover", bookid);
if(!get("books.google.com", url, NULL, NULL, &buf))
- return -1;
+ return 0;
if((s = strstr(buf, "_OC_Run({\"page\":[")) == NULL)
- return -1;
+ return 0;
s+=strlen("_OC_Run({\"page\":[");
for(i=0, p=pages[0];*s; s++) {
@@ -125,7 +125,7 @@ int main(int argc, char *argv[])
page = malloc(sizeof(*page) * MAXPAGES);
for(i=0; i<MAXPAGES; i++) page[i] = malloc(sizeof(**page));
if(!(totalpages = getpagelist(bookid, page))) {
- fprintf(stderr, "Could not find pages for %s\n", bookid);
+ fprintf(stderr, "Could not find any pages for %s\n", bookid);
return 1;
}
for(i=0; i<totalpages; i++) {