diff options
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | getgbook.c | 19 |
2 files changed, 15 insertions, 8 deletions
@@ -1,7 +1,11 @@ +Note: looks like google allows around 3 page requests per cookie session, and about 40 per ip per [some time period]. If I knew the time period, and once stdin retry is working, could make a script that gets all it can, gets a list of failures, waits, then tries failures, etc. Note these would also have to stop at some point; some pages just aren't available + make sure i'm checking all lib calls that could fail make sure all arrays are used within bounds +strace to check paths taken are sensible + use defined constants rather than e.g. 1024 getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more) @@ -124,16 +124,19 @@ int main(int argc, char *argv[]) } else { while(fgets(buf, 1024, stdin)) { sscanf(buf, "%15s", pg); - get("books.google.com", "/", NULL, cookie, &tmp); - if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) { - fprintf(stderr, "%s failed\n", pg); + for(retry = 0; retry < 5; retry++) { + get("books.google.com", "/", NULL, cookie, &tmp); + if((page = getpagedetail(bookid, pg, cookie)) && page->url[0]) { + snprintf(n, 80, "%05d.png", page->num); + gettofile("books.google.com", page->url, cookie, NULL, n); + printf("Downloaded page %d\n", page->num); + free(page); + break; + } if(page) free(page); - continue; } - snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, cookie, NULL, n); - printf("Downloaded page %d\n", page->num); - free(page); + if(retry == 5) + fprintf(stderr, "%s failed\n", pg); } } |