From 043da4609ae6f9e229f0f03d602f57908f66879a Mon Sep 17 00:00:00 2001 From: Nick White Date: Sun, 21 Aug 2011 17:22:35 +0100 Subject: Fix reporting of no pages available --- TODO | 9 +++++++-- getgbook.c | 6 +++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/TODO b/TODO index 4703148..4eb35e4 100644 --- a/TODO +++ b/TODO @@ -31,9 +31,14 @@ have websummary.sh print the date of release, e.g. mkdir of bookid and save pages in there +add cmdline arguments for stdin parsing + +merge pageinfo branch + +### notes + Google will give you up to 5 cookies which get useful pages in immediate succession. It will stop serving new pages to the ip, even with a fresh cookie. So the cookie is certainly not everything. If one does something too naughty, all requests from the ip to books.google.com are blocked with a 403 'automated requests' error for 24 hours. What causes this ip block is less clear. It certainly isn't after just trying lots of pages with 5 cookies. It seems to be after requesting 100 new cookies in a certain time period - 100 in 5 minutes seemed to do it, as did 100 in ~15 minutes. -NOTE!!: the method of getting all pages from book page does miss some; they aren't all listed -* these pages can often be requested, though +The method of getting all pages from book webpage does miss some; they aren't all listed. These pages can often be requested, though. diff --git a/getgbook.c b/getgbook.c index e60316f..3fbdf47 100644 --- a/getgbook.c +++ b/getgbook.c @@ -33,10 +33,10 @@ int getpagelist(char *bookid, Page **pages) snprintf(url, URLMAX, "/books?id=%s&printsec=frontcover", bookid); if(!get("books.google.com", url, NULL, NULL, &buf)) - return -1; + return 0; if((s = strstr(buf, "_OC_Run({\"page\":[")) == NULL) - return -1; + return 0; s+=strlen("_OC_Run({\"page\":["); for(i=0, p=pages[0];*s; s++) { @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) page = malloc(sizeof(*page) * MAXPAGES); for(i=0; i