summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <hg@njw.me.uk>2011-07-29 01:03:44 +0100
committerNick White <hg@njw.me.uk>2011-07-29 01:03:44 +0100
commit5945770938a14d2364ab56049df4988cb25890d9 (patch)
tree05262709ce9414b7979170b7062b89a5efbd68f5
parent568751040761f707deeb46b7032912ac7a7dd34b (diff)
Retry properly with specific pages
-rw-r--r--TODO4
-rw-r--r--getgbook.c19
2 files changed, 15 insertions, 8 deletions
diff --git a/TODO b/TODO
index efb6f83..43cb56a 100644
--- a/TODO
+++ b/TODO
@@ -1,7 +1,11 @@
+Note: looks like google allows around 3 page requests per cookie session, and about 40 per ip per [some time period]. If I knew the time period, and once stdin retry is working, could make a script that gets all it can, gets a list of failures, waits, then tries failures, etc. Note these would also have to stop at some point; some pages just aren't available
+
make sure i'm checking all lib calls that could fail
make sure all arrays are used within bounds
+strace to check paths taken are sensible
+
use defined constants rather than e.g. 1024
getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more)
diff --git a/getgbook.c b/getgbook.c
index c87b1c1..f947a82 100644
--- a/getgbook.c
+++ b/getgbook.c
@@ -124,16 +124,19 @@ int main(int argc, char *argv[])
} else {
while(fgets(buf, 1024, stdin)) {
sscanf(buf, "%15s", pg);
- get("books.google.com", "/", NULL, cookie, &tmp);
- if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) {
- fprintf(stderr, "%s failed\n", pg);
+ for(retry = 0; retry < 5; retry++) {
+ get("books.google.com", "/", NULL, cookie, &tmp);
+ if((page = getpagedetail(bookid, pg, cookie)) && page->url[0]) {
+ snprintf(n, 80, "%05d.png", page->num);
+ gettofile("books.google.com", page->url, cookie, NULL, n);
+ printf("Downloaded page %d\n", page->num);
+ free(page);
+ break;
+ }
if(page) free(page);
- continue;
}
- snprintf(n, 80, "%05d.png", page->num);
- gettofile("books.google.com", page->url, cookie, NULL, n);
- printf("Downloaded page %d\n", page->num);
- free(page);
+ if(retry == 5)
+ fprintf(stderr, "%s failed\n", pg);
}
}