summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <hg@njw.me.uk>2011-07-24 12:14:40 +0100
committerNick White <hg@njw.me.uk>2011-07-24 12:14:40 +0100
commitbd2fd4f1007508298660d1b617b9368280ed9f51 (patch)
tree3de7bdda0e63101185da106f651e2076aec977cd
parent491a4f48095a028201a9183b390f9f82c5235046 (diff)
Add cookie usage
-rw-r--r--TODO1
-rw-r--r--getgbook.c30
-rw-r--r--util.c17
-rw-r--r--util.h5
4 files changed, 36 insertions, 17 deletions
diff --git a/TODO b/TODO
index ff35451..efb6f83 100644
--- a/TODO
+++ b/TODO
@@ -2,6 +2,7 @@ make sure i'm checking all lib calls that could fail
make sure all arrays are used within bounds
+use defined constants rather than e.g. 1024
getgbooktxt (different program as it gets from html pages, which getgbook doesn't any more)
diff --git a/getgbook.c b/getgbook.c
index a11bf24..08cee6c 100644
--- a/getgbook.c
+++ b/getgbook.c
@@ -20,7 +20,7 @@ typedef struct {
char pagecodes[][3] = { "PP", "PR", "PA", "PT", "\0" };
-Page *getpagedetail(char *bookid, char *pg)
+Page *getpagedetail(char *bookid, char *pg, char *cookie)
{
char url[URLMAX];
char *buf, *c, *d, m[80], *p;
@@ -28,7 +28,7 @@ Page *getpagedetail(char *bookid, char *pg)
snprintf(url, URLMAX, "/books?id=%s&pg=%s&jscmd=click3", bookid, pg);
- if(!get("books.google.com", url, &buf))
+ if(!get("books.google.com", url, cookie, NULL, &buf))
return NULL;
snprintf(m, 80, "\"pid\":\"%s\"", pg);
@@ -67,8 +67,8 @@ Page *getpagedetail(char *bookid, char *pg)
int main(int argc, char *argv[])
{
- char *bookid, pg[16], buf[1024], n[80], code[3];
- int i, c;
+ char *bookid, *tmp, pg[16], buf[1024], n[80], code[3], cookie[COOKIEMAX], u[1024];
+ int i, c, retry;
Page *page;
if(argc < 2 || argc > 3 ||
@@ -81,10 +81,10 @@ int main(int argc, char *argv[])
if(argv[1][0] == '-') {
strncpy(code, pagecodes[0], 3);
- c = i = 0;
+ c = i = retry = 0;
while(++i) {
snprintf(pg, 15, "%s%d", code, i);
- if(!(page = getpagedetail(bookid, pg))) {
+ if(!(page = getpagedetail(bookid, pg, cookie))) {
/* no more pages with that code */
strncpy(code, pagecodes[++c], 3);
if(code[0] == '\0') break;
@@ -92,13 +92,23 @@ int main(int argc, char *argv[])
continue;
}
if(!page->url[0]) {
- fprintf(stderr, "%s not available\n", pg);
free(page);
+ /* try with fresh cookie */
+ if(!retry) {
+ snprintf(u, URLMAX, "/books?id=%s", bookid);
+ get("books.google.com", u, NULL, cookie, &tmp);
+ free(tmp);
+ retry=1;
+ i--;
+ } else {
+ fprintf(stderr, "%s not available\n", pg);
+ retry=0;
+ }
continue;
}
if(argv[1][1] == 'a') {
snprintf(n, 80, "%05d.png", page->num);
- gettofile("books.google.com", page->url, n);
+ gettofile("books.google.com", page->url, cookie, NULL, n);
printf("Downloaded page %d\n", page->num);
} else if(page->num != -1)
printf("%s %d\n", page->name, page->num);
@@ -107,13 +117,13 @@ int main(int argc, char *argv[])
} else {
while(fgets(buf, 1024, stdin)) {
sscanf(buf, "%15s", pg);
- if(!(page = getpagedetail(bookid, pg)) || !page->url[0]) {
+ if(!(page = getpagedetail(bookid, pg, cookie)) || !page->url[0]) {
fprintf(stderr, "%s failed\n", pg);
free(page);
continue;
}
snprintf(n, 80, "%05d.png", page->num);
- gettofile("books.google.com", page->url, n);
+ gettofile("books.google.com", page->url, cookie, NULL, n);
printf("Downloaded page %d\n", page->num);
free(page);
}
diff --git a/util.c b/util.c
index ab6fdf8..21bc598 100644
--- a/util.c
+++ b/util.c
@@ -6,9 +6,10 @@
#include <netdb.h>
#include <netinet/in.h>
#include <sys/socket.h>
+#include "util.h"
/* plundered from suckless' sic */
-static int dial(char *host, char *port) {
+int dial(char *host, char *port) {
static struct addrinfo hints;
int srv;
struct addrinfo *res, *r;
@@ -35,23 +36,29 @@ static int dial(char *host, char *port) {
return srv;
}
-int get(char *host, char *path, char **buf) {
+int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf) {
size_t l, res;
int fd, i, p;
char h[1024] = "\0";
+ char c[1024] = "";
FILE *srv;
+ if(savecookie) savecookie[0] = 0; /* TEMP TO PLEASE GCC */
if((fd = dial(host, "80")) == -1) return 0;
srv = fdopen(fd, "r+");
+ if(sendcookie)
+ snprintf(c, COOKIEMAX-1, "\r\nCookie: %s", sendcookie);
fprintf(srv, "GET %s HTTP/1.0\r\nUser-Agent: getgbook-"VERSION \
- " (not mozilla)\r\nHost: %s\r\n\r\n", path, host);
+ " (not mozilla)\r\nHost: %s%s\r\n\r\n", path, host, c);
fflush(srv);
while(h[0] != '\r') {
fgets(h, 1024, srv);
if(sscanf(h, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200)
return 0;
+ if(savecookie != NULL && sscanf(h, "Set-Cookie: %s;", c))
+ strncat(savecookie, c, COOKIEMAX-1);
}
*buf = malloc(sizeof(char *) * 4096);
@@ -62,12 +69,12 @@ int get(char *host, char *path, char **buf) {
return l;
}
-int gettofile(char *host, char *url, char *savepath) {
+int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath) {
char *buf = 0;
FILE *f;
size_t i, l;
- if(!(l = get(host, url, &buf))) {
+ if(!(l = get(host, url, sendcookie, savecookie, &buf))) {
fprintf(stderr, "Could not download %s\n", url);
return 1;
}
diff --git a/util.h b/util.h
index 541fab3..82c0a29 100644
--- a/util.h
+++ b/util.h
@@ -1,4 +1,5 @@
/* See COPYING file for copyright, license and warranty details. */
+#define COOKIEMAX 1024
int dial(char *host, char *port);
-int get(char *host, char *path, char **buf);
-int gettofile(char *host, char *url, char *savepath);
+int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf);
+int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *savepath);