summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2011-09-20 21:20:44 +0100
committerNick White <git@njw.me.uk>2011-09-20 21:20:44 +0100
commit159a3c8ef4a3844972981e03dbcb2759f2725e79 (patch)
tree6d09760805df8938fd5b758baf170cce879414f0
parentaaca1c3ef0fa07a9a8178d3001a0c681e374d448 (diff)
Get jumbo image urls from initial request too
-rw-r--r--TODO2
-rw-r--r--getabook.c80
2 files changed, 47 insertions, 35 deletions
diff --git a/TODO b/TODO
index 798a93d..e9816c8 100644
--- a/TODO
+++ b/TODO
@@ -2,7 +2,7 @@ before 1.0: create other 2 utilities, fix http bugs, be unicode safe, package fo
# getabook
-parse jumboImageUrls in getBookData
+not all pages of 0312607172 are got, though they're available from web interface. maybe cookies are needed after all :(
# getbnbook
diff --git a/getabook.c b/getabook.c
index 7f205d9..9f10868 100644
--- a/getabook.c
+++ b/getabook.c
@@ -22,6 +22,41 @@ Page **pages;
int numpages;
char *bookid;
+int fillurls(char *buf) {
+ char m[STRMAX];
+ char *c, *s;
+ int i;
+
+ if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) {
+ free(buf);
+ return 1;
+ }
+ s += strlen("\"jumboImageUrls\":{");
+
+ for(i=0; *s && i<numpages; i++) {
+ c = s;
+
+ snprintf(m, STRMAX, "\"%d\":", pages[i]->num);
+
+ while(strncmp(c, m, strlen(m)) != 0) {
+ while(*c && *c != '}' && *c != ',')
+ c++;
+ if(*c == '}')
+ break;
+ c++;
+ }
+ if(*c == '}')
+ continue;
+
+ c += strlen(m);
+ if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url))
+ continue;
+ }
+
+ free(buf);
+ return 0;
+}
+
int getpagelist()
{
char url[URLMAX], b[STRMAX];
@@ -33,10 +68,10 @@ int getpagelist()
snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid);
if(!get("www.amazon.com", url, NULL, NULL, &buf))
- return 0;
+ return 1;
if((s = strstr(buf, "\"litbPages\":[")) == NULL)
- return 0;
+ return 1;
s+=strlen("\"litbPages\":[");
for(i=0, p=pages[0];*s && i<MAXPAGES; s++) {
@@ -48,47 +83,24 @@ int getpagelist()
break;
p->url[0] = '\0';
}
- free(buf);
- return i;
+ numpages = i;
+
+ fillurls(buf);
+
+ return 0;
}
int getpageurls(int pagenum) {
- char url[URLMAX], m[STRMAX];
- char *c, *s, *buf = NULL;
- int i;
+ char url[URLMAX];
+ char *buf = NULL;
snprintf(url, URLMAX, "/gp/search-inside/service-data?method=goToPage&asin=%s&page=%d", bookid, pagenum);
if(!get("www.amazon.com", url, NULL, NULL, &buf))
return 1;
- if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) {
- free(buf);
- return 1;
- }
- s += strlen("\"jumboImageUrls\":{");
+ fillurls(buf);
- for(i=0; *s && i<numpages; i++) {
- c = s;
-
- snprintf(m, STRMAX, "\"%d\":", pages[i]->num);
-
- while(strncmp(c, m, strlen(m)) != 0) {
- while(*c && *c != '}' && *c != ',')
- c++;
- if(*c == '}')
- break;
- c++;
- }
- if(*c == '}')
- continue;
-
- c += strlen(m);
- if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url))
- continue;
- }
-
- free(buf);
return 0;
}
@@ -129,7 +141,7 @@ int main(int argc, char *argv[])
bookid = argv[argc-1];
pages = malloc(sizeof(*pages) * MAXPAGES);
- if(!(numpages = getpagelist(bookid, pages))) {
+ if(getpagelist(bookid, pages)) {
fprintf(stderr, "Could not find any pages for %s\n", bookid);
return 1;
}