From 0e4276a1ac3b6683619179a180fd76ecd20dc54b Mon Sep 17 00:00:00 2001 From: Nick White Date: Wed, 20 May 2020 14:28:04 +0100 Subject: Fix getabook There were two things that needed to be changed to get getabook to work again: - HTTPS usage - Disabling HTTP compression --- getabook.c | 8 +++++--- util.c | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/getabook.c b/getabook.c index 6f7d2fd..f8d8215 100644 --- a/getabook.c +++ b/getabook.c @@ -15,7 +15,7 @@ " -n download pages from numbers in stdin\n" \ " otherwise, all available pages will be downloaded\n" -#define USESSL 0 +#define USESSL 1 #define URLMAX 2048 #define STRMAX 1024 #define MAXPAGES 9999 @@ -75,8 +75,10 @@ int getpagelist() snprintf(url, URLMAX, "/gp/search-inside/service-data?method=getBookData&asin=%s", bookid); - if(!get("www.amazon.com", USESSL, url, NULL, NULL, &buf, 1)) + if(!get("www.amazon.com", USESSL, url, NULL, NULL, &buf, 1)) { + fprintf(stderr, "failed to get url %s\n", url); return 1; + } /* amazon have a canonical asin, which is needed to get all available pages */ if((s = strstr(buf, "\"ASIN\":\"")) != NULL) { @@ -154,7 +156,7 @@ int getpage(Page *page) return 1; } - if(!sscanf(page->url, "http://%[^/]/", host)) { + if(!sscanf(page->url, "https://%[^/]/", host)) { fprintf(stderr, "can't parse host of %s\n", page->url); return 1; } diff --git a/util.c b/util.c index b7b7693..91cb060 100644 --- a/util.c +++ b/util.c @@ -146,8 +146,7 @@ int request(char *host, int ssl, char *request, char *savecookie, char **body, i cur = pos + 2; if(sscanf(headline, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) { - if(p == 403) - fprintf(stderr, "403 forbidden: your IP address may be temporarily blocked\n"); + fprintf(stderr, "HTTP code %d; your IP address may be temporarily blocked\n", p); free(buf); free(*body); return 0; @@ -178,7 +177,8 @@ int get(char *host, int ssl, char *path, char *sendcookie, char *savecookie, cha if(sendcookie && sendcookie[0]) snprintf(c, COOKIEMAX, "\r\nCookie: %s", sendcookie); snprintf(h, BUFSIZ, "GET %s HTTP/1.0\r\nUser-Agent: getxbook-"VERSION \ - " (not mozilla)\r\nHost: %s%s\r\n\r\n", path, host, c); + " (not mozilla)\r\nAccept-Encoding: gzip;q=0,deflate;q=0\r\n" \ + "Host: %s%s\r\n\r\n", path, host, c); return request(host, ssl, h, savecookie, body, istext); } -- cgit v1.2.3