diff options
author | Nick White <hg@njw.me.uk> | 2011-07-24 12:14:40 +0100 |
---|---|---|
committer | Nick White <hg@njw.me.uk> | 2011-07-24 12:14:40 +0100 |
commit | bd2fd4f1007508298660d1b617b9368280ed9f51 (patch) | |
tree | 3de7bdda0e63101185da106f651e2076aec977cd | |
parent | 491a4f48095a028201a9183b390f9f82c5235046 (diff) |
Add cookie usage
-rw-r--r-- | TODO | 1 | ||||
-rw-r--r-- | getgbook.c | 30 | ||||
-rw-r--r-- | util.c | 17 | ||||
-rw-r--r-- | util.h | 5 |
4 files changed, 36 insertions, 17 deletions
@@ -2,6 +2,7 @@ make sure i'm checking all lib calls that could fail make sure all arrays are used within bounds +use defined constants rather than e.g. 1024 getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more) @@ -20,7 +20,7 @@ typedef struct { char pagecodes[][3] = { "PP", "PR", "PA", "PT", "\0" }; -Page *getpagedetail(char *bookid, char *pg) +Page *getpagedetail(char *bookid, char *pg, char *cookie) { char url[URLMAX]; char *buf, *c, *d, m[80], *p; @@ -28,7 +28,7 @@ Page *getpagedetail(char *bookid, char *pg) snprintf(url, URLMAX, "/books?id=%s&pg=%s&jscmd=click3", bookid, pg); - if(!get("books.google.com", url, &buf)) + if(!get("books.google.com", url, cookie, NULL, &buf)) return NULL; snprintf(m, 80, "\"pid\":\"%s\"", pg); @@ -67,8 +67,8 @@ Page *getpagedetail(char *bookid, char *pg) int main(int argc, char *argv[]) { - char *bookid, pg[16], buf[1024], n[80], code[3]; - int i, c; + char *bookid, *tmp, pg[16], buf[1024], n[80], code[3], cookie[COOKIEMAX], u[1024]; + int i, c, retry; Page *page; if(argc < 2 || argc > 3 || @@ -81,10 +81,10 @@ int main(int argc, char *argv[]) if(argv[1][0] == '-') { strncpy(code, pagecodes[0], 3); - c = i = 0; + c = i = retry = 0; while(++i) { snprintf(pg, 15, "%s%d", code, i); - if(!(page = getpagedetail(bookid, pg))) { + if(!(page = getpagedetail(bookid, pg, cookie))) { /* no more pages with that code */ strncpy(code, pagecodes[++c], 3); if(code[0] == '\0') break; @@ -92,13 +92,23 @@ int main(int argc, char *argv[]) continue; } if(!page->url[0]) { - fprintf(stderr, "%s not available\n", pg); free(page); + /* try with fresh cookie */ + if(!retry) { + snprintf(u, URLMAX, "/books?id=%s", bookid); + get("books.google.com", u, NULL, cookie, &tmp); + free(tmp); + retry=1; + i--; + } else { + fprintf(stderr, "%s not available\n", pg); + retry=0; + } continue; } if(argv[1][1] == 'a') { snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, n); + gettofile("books.google.com", page->url, cookie, NULL, n); printf("Downloaded page %d\n", page->num); } else if(page->num != -1) printf("%s %d\n", page->name, page->num); @@ -107,13 +117,13 @@ int main(int argc, char *argv[]) } else { while(fgets(buf, 1024, stdin)) { sscanf(buf, "%15s", pg); - if(!(page = getpagedetail(bookid, pg)) || !page->url[0]) { + if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) { fprintf(stderr, "%s failed\n", pg); free(page); continue; } snprintf(n, 80, "%05d.png", page->num); - gettofile("books.google.com", page->url, n); + gettofile("books.google.com", page->url, cookie, NULL, n); printf("Downloaded page %d\n", page->num); free(page); } @@ -6,9 +6,10 @@ #include <netdb.h> #include <netinet/in.h> #include <sys/socket.h> +#include "util.h" /* plundered from suckless' sic */ -static int dial(char *host, char *port) { +int dial(char *host, char *port) { static struct addrinfo hints; int srv; struct addrinfo *res, *r; @@ -35,23 +36,29 @@ static int dial(char *host, char *port) { return srv; } -int get(char *host, char *path, char **buf) { +int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf) { size_t l, res; int fd, i, p; char h[1024] = "\0"; + char c[1024] = ""; FILE *srv; + if(savecookie) savecookie[0] = 0; /* TEMP TO PLEASE GCC */ if((fd = dial(host, "80")) == -1) return 0; srv = fdopen(fd, "r+"); + if(sendcookie) + snprintf(c, COOKIEMAX-1, "\r\nCookie: %s", sendcookie); fprintf(srv, "GET %s HTTP/1.0\r\nUser-Agent: getgbook-"VERSION \ - " (not mozilla)\r\nHost: %s\r\n\r\n", path, host); + " (not mozilla)\r\nHost: %s%s\r\n\r\n", path, host, c); fflush(srv); while(h[0] != '\r') { fgets(h, 1024, srv); if(sscanf(h, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) return 0; + if(savecookie != NULL && sscanf(h, "Set-Cookie: %s;", c)) + strncat(savecookie, c, COOKIEMAX-1); } *buf = malloc(sizeof(char *) * 4096); @@ -62,12 +69,12 @@ int get(char *host, char *path, char **buf) { return l; } -int gettofile(char *host, char *url, char *savepath) { +int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath) { char *buf = 0; FILE *f; size_t i, l; - if(!(l = get(host, url, &buf))) { + if(!(l = get(host, url, sendcookie, savecookie, &buf))) { fprintf(stderr, "Could not download %s\n", url); return 1; } @@ -1,4 +1,5 @@ /* See COPYING file for copyright, license and warranty details. */ +#define COOKIEMAX 1024 int dial(char *host, char *port); -int get(char *host, char *path, char **buf); -int gettofile(char *host, char *url, char *savepath); +int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf); +int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath); |