summaryrefslogtreecommitdiff
path: root/getabook.c
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2011-09-22 08:49:47 +0100
committerNick White <git@njw.me.uk>2011-09-22 08:49:47 +0100
commit76315d83930499e4763c7a38787c72b006e8132a (patch)
tree3e4fd4a386cb0e2ebc789646696267b4b24c6a73 /getabook.c
parent692ab952da7852ad01038d5150f084098db6462e (diff)
parent159a3c8ef4a3844972981e03dbcb2759f2725e79 (diff)
Merge branch 'amz'
Diffstat (limited to 'getabook.c')
-rw-r--r--getabook.c185
1 files changed, 185 insertions, 0 deletions
diff --git a/getabook.c b/getabook.c
new file mode 100644
index 0000000..9f10868
--- /dev/null
+++ b/getabook.c
@@ -0,0 +1,185 @@
+/* See COPYING file for copyright and license details. */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "util.h"
+
+#define usage "getabook " VERSION " - an amazon look inside the book downloader\n" \
+ "usage: getabook [-c|-n] asin\n" \
+ " -n download pages from numbers in stdin\n" \
+ " otherwise, all available pages will be downloaded\n"
+
+#define URLMAX 1024
+#define STRMAX 1024
+#define MAXPAGES 9999
+
+typedef struct {
+ int num;
+ char url[URLMAX];
+} Page;
+
+Page **pages;
+int numpages;
+char *bookid;
+
+int fillurls(char *buf) {
+ char m[STRMAX];
+ char *c, *s;
+ int i;
+
+ if(!(s = strstr(buf, "\"jumboImageUrls\":{"))) {
+ free(buf);
+ return 1;
+ }
+ s += strlen("\"jumboImageUrls\":{");
+
+ for(i=0; *s && i<numpages; i++) {
+ c = s;
+
+ snprintf(m, STRMAX, "\"%d\":", pages[i]->num);
+
+ while(strncmp(c, m, strlen(m)) != 0) {
+ while(*c && *c != '}' && *c != ',')
+ c++;
+ if(*c == '}')
+ break;
+ c++;
+ }
+ if(*c == '}')
+ continue;
+
+ c += strlen(m);
+ if(!sscanf(c, "\"//sitb-images.amazon.com%[^\"]\"", pages[i]->url))
+ continue;
+ }
+
+ free(buf);
+ return 0;
+}
+
+int getpagelist()
+{
+ char url[URLMAX], b[STRMAX];
+ char *buf = NULL;
+ char *s, *c;
+ int i;
+ Page *p;
+
+ snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid);
+
+ if(!get("www.amazon.com", url, NULL, NULL, &buf))
+ return 1;
+
+ if((s = strstr(buf, "\"litbPages\":[")) == NULL)
+ return 1;
+ s+=strlen("\"litbPages\":[");
+
+ for(i=0, p=pages[0];*s && i<MAXPAGES; s++) {
+ for(c = b; *s != ',' && *s != ']'; s++, c++) *c = *s;
+ *(c+1) = '\0';
+ p=pages[i++]=malloc(sizeof(**pages));;
+ sscanf(b, "%d,", &(p->num));
+ if(s[0] == ']')
+ break;
+ p->url[0] = '\0';
+ }
+ numpages = i;
+
+ fillurls(buf);
+
+ return 0;
+}
+
+int getpageurls(int pagenum) {
+ char url[URLMAX];
+ char *buf = NULL;
+
+ snprintf(url, URLMAX, "/gp/search-inside/service-data?method=goToPage&asin=%s&page=%d", bookid, pagenum);
+
+ if(!get("www.amazon.com", url, NULL, NULL, &buf))
+ return 1;
+
+ fillurls(buf);
+
+ return 0;
+}
+
+int getpage(Page *page)
+{
+ char path[STRMAX];
+ snprintf(path, STRMAX, "%04d.png", page->num);
+
+ if(page->url[0] == '\0') {
+ fprintf(stderr, "%d not found\n", page->num);
+ return 1;
+ }
+
+ if(gettofile("sitb-images.amazon.com", page->url, NULL, NULL, path)) {
+ fprintf(stderr, "%d failed\n", page->num);
+ return 1;
+ }
+
+ printf("%d downloaded\n", page->num);
+ fflush(stdout);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ char buf[BUFSIZ], pgpath[STRMAX];
+ char in[16];
+ int a, i, n;
+ FILE *f;
+
+ if(argc < 2 || argc > 3 ||
+ (argc == 3 && (argv[1][0]!='-' || argv[1][1] != 'n'))
+ || (argc >= 2 && argv[1][0] == '-' && argv[1][1] == 'h')) {
+ fputs(usage, stdout);
+ return 1;
+ }
+
+ bookid = argv[argc-1];
+
+ pages = malloc(sizeof(*pages) * MAXPAGES);
+ if(getpagelist(bookid, pages)) {
+ fprintf(stderr, "Could not find any pages for %s\n", bookid);
+ return 1;
+ }
+
+ if(argc == 2) {
+ for(i=0; i<numpages; i++) {
+ snprintf(pgpath, STRMAX, "%04d.png", pages[i]->num);
+ if((f = fopen(pgpath, "r")) != NULL) {
+ fclose(f);
+ continue;
+ }
+ if(pages[i]->url[0] == '\0')
+ getpageurls(pages[i]->num);
+ getpage(pages[i]);
+ }
+ } else if(argv[1][0] == '-' && argv[1][1] == 'n') {
+ while(fgets(buf, BUFSIZ, stdin)) {
+ sscanf(buf, "%15s", in);
+ i = -1;
+ sscanf(in, "%d", &n);
+ for(a=0; a<numpages; a++) {
+ if(pages[a]->num == n) {
+ i = a;
+ break;
+ }
+ }
+ if(i == -1) {
+ fprintf(stderr, "%s not found\n", in);
+ continue;
+ }
+ if(pages[i]->url[0] == '\0')
+ getpageurls(pages[i]->num);
+ getpage(pages[i]);
+ }
+ }
+
+ for(i=0; i<numpages; i++) free(pages[i]);
+ free(pages);
+
+ return EXIT_SUCCESS;
+}