From 5945770938a14d2364ab56049df4988cb25890d9 Mon Sep 17 00:00:00 2001 From: Nick White Date: Fri, 29 Jul 2011 01:03:44 +0100 Subject: Retry properly with specific pages --- TODO | 4 ++++ getgbook.c | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index efb6f83..43cb56a 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,11 @@ +Note: looks like google allows around 3 page requests per cookie session, and about 40 per ip per [some time period]. If I knew the time period, and once stdin retry is working, could make a script that gets all it can, gets a list of failures, waits, then tries failures, etc. Note these would also have to stop at some point; some pages just aren't available + make sure i'm checking all lib calls that could fail make sure all arrays are used within bounds +strace to check paths taken are sensible + use defined constants rather than e.g. 1024 getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more) diff --git a/getgbook.c b/getgbook.c index c87b1c1..f947a82 100644 --- a/getgbook.c +++ b/getgbook.c @@ -124,16 +124,19 @@ int main(int argc, char *argv[]) } else { while(fgets(buf, 1024, stdin)) { sscanf(buf, "%15s", pg); - get("books.google.com", "/", NULL, cookie, &tmp); - if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) { - fprintf(stderr, "%s failed\n", pg); + for(retry = 0; retry < 5; retry++) { + get("books.google.com", "/", NULL, cookie, &tmp); + if((page = getpagedetail(bookid, pg, cookie)) && page->url[0]) { + snprintf(n, 80, "%05d.png", page->num); + gettofile("books.google.com", page->url, cookie, NULL, n); + printf("Downloaded page %d\n", page->num); + free(page); + break; + } if(page) free(page); - continue; } - snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, cookie, NULL, n); - printf("Downloaded page %d\n", page->num); - free(page); + if(retry == 5) + fprintf(stderr, "%s failed\n", pg); } } -- cgit v1.2.3