From 159a3c8ef4a3844972981e03dbcb2759f2725e79 Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 20 Sep 2011 21:20:44 +0100 Subject: Get jumbo image urls from initial request too --- TODO | 2 +- getabook.c | 80 ++++++++++++++++++++++++++++++++++++-------------------------- 2 files changed, 47 insertions(+), 35 deletions(-) diff --git a/TODO b/TODO index 798a93d..e9816c8 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,7 @@ before 1.0: create other 2 utilities, fix http bugs, be unicode safe, package fo # getabook -parse jumboImageUrls in getBookData +not all pages of 0312607172 are got, though they're available from web interface. maybe cookies are needed after all :( # getbnbook diff --git a/getabook.c b/getabook.c index 7f205d9..9f10868 100644 --- a/getabook.c +++ b/getabook.c @@ -22,6 +22,41 @@ Page **pages; int numpages; char *bookid; +int fillurls(char *buf) { + char m[STRMAX]; + char *c, *s; + int i; + + if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) { + free(buf); + return 1; + } + s += strlen("\"jumboImageUrls\":{"); + + for(i=0; *s && inum); + + while(strncmp(c, m, strlen(m)) != 0) { + while(*c && *c != '}' && *c != ',') + c++; + if(*c == '}') + break; + c++; + } + if(*c == '}') + continue; + + c += strlen(m); + if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url)) + continue; + } + + free(buf); + return 0; +} + int getpagelist() { char url[URLMAX], b[STRMAX]; @@ -33,10 +68,10 @@ int getpagelist() snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid); if(!get("www.amazon.com", url, NULL, NULL, &buf)) - return 0; + return 1; if((s = strstr(buf, "\"litbPages\":[")) == NULL) - return 0; + return 1; s+=strlen("\"litbPages\":["); for(i=0, p=pages[0];*s && iurl[0] = '\0'; } - free(buf); - return i; + numpages = i; + + fillurls(buf); + + return 0; } int getpageurls(int pagenum) { - char url[URLMAX], m[STRMAX]; - char *c, *s, *buf = NULL; - int i; + char url[URLMAX]; + char *buf = NULL; snprintf(url, URLMAX, "/gp/search-inside/service-data?method=goToPage&asin=%s&page=%d", bookid, pagenum); if(!get("www.amazon.com", url, NULL, NULL, &buf)) return 1; - if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) { - free(buf); - return 1; - } - s += strlen("\"jumboImageUrls\":{"); + fillurls(buf); - for(i=0; *s && inum); - - while(strncmp(c, m, strlen(m)) != 0) { - while(*c && *c != '}' && *c != ',') - c++; - if(*c == '}') - break; - c++; - } - if(*c == '}') - continue; - - c += strlen(m); - if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url)) - continue; - } - - free(buf); return 0; } @@ -129,7 +141,7 @@ int main(int argc, char *argv[]) bookid = argv[argc-1]; pages = malloc(sizeof(*pages) * MAXPAGES); - if(!(numpages = getpagelist(bookid, pages))) { + if(getpagelist(bookid, pages)) { fprintf(stderr, "Could not find any pages for %s\n", bookid); return 1; } -- cgit v1.2.3