Get jumbo image urls from initial request too

author: Nick White <git@njw.me.uk> 2011-09-20 21:20:44 +0100
committer: Nick White <git@njw.me.uk> 2011-09-20 21:20:44 +0100
commit: 159a3c8ef4a3844972981e03dbcb2759f2725e79 (patch)
tree: 6d09760805df8938fd5b758baf170cce879414f0
parent: aaca1c3ef0fa07a9a8178d3001a0c681e374d448 (diff)
2 files changed, 47 insertions, 35 deletions
diff --git a/TODO b/TODO
index 798a93d..e9816c8 100644
--- a/TODO
+++ b/TODO
@@ -2,7 +2,7 @@ before 1.0: create other 2 utilities, fix http bugs, be unicode safe, package fo
 
 # getabook
 
-parse jumboImageUrls in getBookData
+not all pages of 0312607172 are got, though they're available from web interface. maybe cookies are needed after all :(
 
 # getbnbook
 
diff --git a/getabook.c b/getabook.c
index 7f205d9..9f10868 100644
--- a/getabook.c
+++ b/getabook.c
@@ -22,6 +22,41 @@ Page **pages;
 int numpages;
 char *bookid;
 
+int fillurls(char *buf) {
+	char m[STRMAX];
+	char *c, *s;
+	int i;
+
+	if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) {
+		free(buf);
+		return 1;
+	}
+	s += strlen("\"jumboImageUrls\":{");
+
+	for(i=0; *s && i<numpages; i++) {
+		c = s;
+
+		snprintf(m, STRMAX, "\"%d\":", pages[i]->num);
+
+		while(strncmp(c, m, strlen(m)) != 0) {
+			while(*c && *c != '}' && *c != ',')
+				c++;
+			if(*c == '}')
+				break;
+			c++;
+		}
+		if(*c == '}')
+			continue;
+
+		c += strlen(m);
+		if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url))
+			continue;
+	}
+
+	free(buf);
+	return 0;
+}
+
 int getpagelist()
 {
 	char url[URLMAX], b[STRMAX];
@@ -33,10 +68,10 @@ int getpagelist()
 	snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid);
 
 	if(!get("www.amazon.com", url, NULL, NULL, &buf))
-		return 0;
+		return 1;
 
 	if((s = strstr(buf, "\"litbPages\":[")) == NULL)
-		return 0;
+		return 1;
 	s+=strlen("\"litbPages\":[");
 
 	for(i=0, p=pages[0];*s && i<MAXPAGES; s++) {
@@ -48,47 +83,24 @@ int getpagelist()
 			break;
 		p->url[0] = '\0';
 	}
-	free(buf);
-	return i;
+	numpages = i;
+
+	fillurls(buf);
+
+	return 0;
 }
 
 int getpageurls(int pagenum) {
-	char url[URLMAX], m[STRMAX];
-	char *c, *s, *buf = NULL;
-	int i;
+	char url[URLMAX];
+	char *buf = NULL;
 
 	snprintf(url, URLMAX, "/gp/search-inside/service-data?method=goToPage&asin=%s&page=%d", bookid, pagenum);
 
 	if(!get("www.amazon.com", url, NULL, NULL, &buf))
 		return 1;
 
-	if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) {
-		free(buf);
-		return 1;
-	}
-	s += strlen("\"jumboImageUrls\":{");
+	fillurls(buf);
 
-	for(i=0; *s && i<numpages; i++) {
-		c = s;
-
-		snprintf(m, STRMAX, "\"%d\":", pages[i]->num);
-		
-		while(strncmp(c, m, strlen(m)) != 0) {
-			while(*c && *c != '}' && *c != ',')
-				c++;
-			if(*c == '}')
-				break;
-			c++;
-		}
-		if(*c == '}')
-			continue;
-
-		c += strlen(m);
-		if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url))
-			continue;
-	}
-
-	free(buf);
 	return 0;
 }
 
@@ -129,7 +141,7 @@ int main(int argc, char *argv[])
 	bookid = argv[argc-1];
 
 	pages = malloc(sizeof(*pages) * MAXPAGES);
-	if(!(numpages = getpagelist(bookid, pages))) {
+	if(getpagelist(bookid, pages)) {
 		fprintf(stderr, "Could not find any pages for %s\n", bookid);
 		return 1;
 	}
author	Nick White <git@njw.me.uk>	2011-09-20 21:20:44 +0100
committer	Nick White <git@njw.me.uk>	2011-09-20 21:20:44 +0100
commit	159a3c8ef4a3844972981e03dbcb2759f2725e79 (patch)
tree	6d09760805df8938fd5b758baf170cce879414f0
parent	aaca1c3ef0fa07a9a8178d3001a0c681e374d448 (diff)