From bd2fd4f1007508298660d1b617b9368280ed9f51 Mon Sep 17 00:00:00 2001 From: Nick White Date: Sun, 24 Jul 2011 12:14:40 +0100 Subject: Add cookie usage --- TODO | 1 + getgbook.c | 30 ++++++++++++++++++++---------- util.c | 17 ++++++++++++----- util.h | 5 +++-- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/TODO b/TODO index ff35451..efb6f83 100644 --- a/TODO +++ b/TODO @@ -2,6 +2,7 @@ make sure i'm checking all lib calls that could fail make sure all arrays are used within bounds +use defined constants rather than e.g. 1024 getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more) diff --git a/getgbook.c b/getgbook.c index a11bf24..08cee6c 100644 --- a/getgbook.c +++ b/getgbook.c @@ -20,7 +20,7 @@ typedef struct { char pagecodes[][3] = { "PP", "PR", "PA", "PT", "\0" }; -Page *getpagedetail(char *bookid, char *pg) +Page *getpagedetail(char *bookid, char *pg, char *cookie) { char url[URLMAX]; char *buf, *c, *d, m[80], *p; @@ -28,7 +28,7 @@ Page *getpagedetail(char *bookid, char *pg) snprintf(url, URLMAX, "/books?id=%s&pg=%s&jscmd=click3", bookid, pg); - if(!get("books.google.com", url, &buf)) + if(!get("books.google.com", url, cookie, NULL, &buf)) return NULL; snprintf(m, 80, "\"pid\":\"%s\"", pg); @@ -67,8 +67,8 @@ Page *getpagedetail(char *bookid, char *pg) int main(int argc, char *argv[]) { - char *bookid, pg[16], buf[1024], n[80], code[3]; - int i, c; + char *bookid, *tmp, pg[16], buf[1024], n[80], code[3], cookie[COOKIEMAX], u[1024]; + int i, c, retry; Page *page; if(argc < 2 || argc > 3 || @@ -81,10 +81,10 @@ int main(int argc, char *argv[]) if(argv[1][0] == '-') { strncpy(code, pagecodes[0], 3); - c = i = 0; + c = i = retry = 0; while(++i) { snprintf(pg, 15, "%s%d", code, i); - if(!(page = getpagedetail(bookid, pg))) { + if(!(page = getpagedetail(bookid, pg, cookie))) { /* no more pages with that code */ strncpy(code, pagecodes[++c], 3); if(code[0] == '\0') break; @@ -92,13 +92,23 @@ int main(int argc, char *argv[]) continue; } if(!page->url[0]) { - fprintf(stderr, "%s not available\n", pg); free(page); + /* try with fresh cookie */ + if(!retry) { + snprintf(u, URLMAX, "/books?id=%s", bookid); + get("books.google.com", u, NULL, cookie, &tmp); + free(tmp); + retry=1; + i--; + } else { + fprintf(stderr, "%s not available\n", pg); + retry=0; + } continue; } if(argv[1][1] == 'a') { snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, n); + gettofile("books.google.com", page->url, cookie, NULL, n); printf("Downloaded page %d\n", page->num); } else if(page->num != -1) printf("%s %d\n", page->name, page->num); @@ -107,13 +117,13 @@ int main(int argc, char *argv[]) } else { while(fgets(buf, 1024, stdin)) { sscanf(buf, "%15s", pg); - if(!(page = getpagedetail(bookid, pg)) || !page->url[0]) { + if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) { fprintf(stderr, "%s failed\n", pg); free(page); continue; } snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, n); + gettofile("books.google.com", page->url, cookie, NULL, n); printf("Downloaded page %d\n", page->num); free(page); } diff --git a/util.c b/util.c index ab6fdf8..21bc598 100644 --- a/util.c +++ b/util.c @@ -6,9 +6,10 @@ #include #include #include +#include "util.h" /* plundered from suckless' sic */ -static int dial(char *host, char *port) { +int dial(char *host, char *port) { static struct addrinfo hints; int srv; struct addrinfo *res, *r; @@ -35,23 +36,29 @@ static int dial(char *host, char *port) { return srv; } -int get(char *host, char *path, char **buf) { +int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf) { size_t l, res; int fd, i, p; char h[1024] = "\0"; + char c[1024] = ""; FILE *srv; + if(savecookie) savecookie[0] = 0; /* TEMP TO PLEASE GCC */ if((fd = dial(host, "80")) == -1) return 0; srv = fdopen(fd, "r+"); + if(sendcookie) + snprintf(c, COOKIEMAX-1, "\r\nCookie: %s", sendcookie); fprintf(srv, "GET %s HTTP/1.0\r\nUser-Agent: getgbook-"VERSION \ - " (not mozilla)\r\nHost: %s\r\n\r\n", path, host); + " (not mozilla)\r\nHost: %s%s\r\n\r\n", path, host, c); fflush(srv); while(h[0] != '\r') { fgets(h, 1024, srv); if(sscanf(h, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) return 0; + if(savecookie != NULL && sscanf(h, "Set-Cookie: %s;", c)) + strncat(savecookie, c, COOKIEMAX-1); } *buf = malloc(sizeof(char *) * 4096); @@ -62,12 +69,12 @@ int get(char *host, char *path, char **buf) { return l; } -int gettofile(char *host, char *url, char *savepath) { +int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath) { char *buf = 0; FILE *f; size_t i, l; - if(!(l = get(host, url, &buf))) { + if(!(l = get(host, url, sendcookie, savecookie, &buf))) { fprintf(stderr, "Could not download %s\n", url); return 1; } diff --git a/util.h b/util.h index 541fab3..82c0a29 100644 --- a/util.h +++ b/util.h @@ -1,4 +1,5 @@ /* See COPYING file for copyright, license and warranty details. */ +#define COOKIEMAX 1024 int dial(char *host, char *port); -int get(char *host, char *path, char **buf); -int gettofile(char *host, char *url, char *savepath); +int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf); +int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath); -- cgit v1.2.3