diff options
author | Nick White <git@njw.me.uk> | 2011-09-20 21:20:44 +0100 |
---|---|---|
committer | Nick White <git@njw.me.uk> | 2011-09-20 21:20:44 +0100 |
commit | 159a3c8ef4a3844972981e03dbcb2759f2725e79 (patch) | |
tree | 6d09760805df8938fd5b758baf170cce879414f0 | |
parent | aaca1c3ef0fa07a9a8178d3001a0c681e374d448 (diff) |
Get jumbo image urls from initial request too
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | getabook.c | 80 |
2 files changed, 47 insertions, 35 deletions
@@ -2,7 +2,7 @@ before 1.0: create other 2 utilities, fix http bugs, be unicode safe, package fo # getabook -parse jumboImageUrls in getBookData +not all pages of 0312607172 are got, though they're available from web interface. maybe cookies are needed after all :( # getbnbook @@ -22,6 +22,41 @@ Page **pages; int numpages; char *bookid; +int fillurls(char *buf) { + char m[STRMAX]; + char *c, *s; + int i; + + if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) { + free(buf); + return 1; + } + s += strlen("\"jumboImageUrls\":{"); + + for(i=0; *s && i<numpages; i++) { + c = s; + + snprintf(m, STRMAX, "\"%d\":", pages[i]->num); + + while(strncmp(c, m, strlen(m)) != 0) { + while(*c && *c != '}' && *c != ',') + c++; + if(*c == '}') + break; + c++; + } + if(*c == '}') + continue; + + c += strlen(m); + if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url)) + continue; + } + + free(buf); + return 0; +} + int getpagelist() { char url[URLMAX], b[STRMAX]; @@ -33,10 +68,10 @@ int getpagelist() snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid); if(!get("www.amazon.com", url, NULL, NULL, &buf)) - return 0; + return 1; if((s = strstr(buf, "\"litbPages\":[")) == NULL) - return 0; + return 1; s+=strlen("\"litbPages\":["); for(i=0, p=pages[0];*s && i<MAXPAGES; s++) { @@ -48,47 +83,24 @@ int getpagelist() break; p->url[0] = '\0'; } - free(buf); - return i; + numpages = i; + + fillurls(buf); + + return 0; } int getpageurls(int pagenum) { - char url[URLMAX], m[STRMAX]; - char *c, *s, *buf = NULL; - int i; + char url[URLMAX]; + char *buf = NULL; snprintf(url, URLMAX, "/gp/search-inside/service-data?method=goToPage&asin=%s&page=%d", bookid, pagenum); if(!get("www.amazon.com", url, NULL, NULL, &buf)) return 1; - if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) { - free(buf); - return 1; - } - s += strlen("\"jumboImageUrls\":{"); + fillurls(buf); - for(i=0; *s && i<numpages; i++) { - c = s; - - snprintf(m, STRMAX, "\"%d\":", pages[i]->num); - - while(strncmp(c, m, strlen(m)) != 0) { - while(*c && *c != '}' && *c != ',') - c++; - if(*c == '}') - break; - c++; - } - if(*c == '}') - continue; - - c += strlen(m); - if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url)) - continue; - } - - free(buf); return 0; } @@ -129,7 +141,7 @@ int main(int argc, char *argv[]) bookid = argv[argc-1]; pages = malloc(sizeof(*pages) * MAXPAGES); - if(!(numpages = getpagelist(bookid, pages))) { + if(getpagelist(bookid, pages)) { fprintf(stderr, "Could not find any pages for %s\n", bookid); return 1; } |