From 4e6b01857db64898a4619415543eefca771a527e Mon Sep 17 00:00:00 2001 From: Nick White Date: Tue, 10 Jul 2012 20:57:07 +0100 Subject: Fix potential bug in HTTP code --- TODO | 10 ++---- util.c | 113 ++++++++++++++++++++++++++++++++++++++--------------------------- 2 files changed, 69 insertions(+), 54 deletions(-) diff --git a/TODO b/TODO index 87bc878..d4b9297 100644 --- a/TODO +++ b/TODO @@ -1,20 +1,14 @@ # other todos -bug in get() & post(): if the \r\n\r\n after http headers is cut off between recv buffers. solution is to get all, then strstr(\n\r\n\r) to find end of header, and memcopy the rest out (so that original memory can be freed) +format and package man pages in win packages in getabook, the web client tries downloading sequentially the first few pages, regardless of whether they're in the available page list. this actually works (some or all of these pages will return), so we should implement something similar too. exactly how it knows when to stop looking is not clear, at least with the one i tried, it just tried all of the first 25 pages. in getgbook, check that downloaded page doesn't match 'page not available' image; if so delete (as may be redownloadable later, perhaps even then with different cookies) in getbnbook, check that downloaded page doesn't match 'page not available' swf; if so delete (as may be redownloadable later, perhaps even then with different cookies) -in getgbook, grab the link data (presumably as json somewhere), and add this to pdf - -1.0 format and package man pages in win and osx packages +submit 'pad' file to websites http://padsites.asp-software.org/ write some little tests -1.0 submit 'pad' file to websites http://padsites.asp-software.org/ - -add function to download html text to getabook (just a html request to get kindle version) - add scribd functionality - example is http://www.scribd.com/doc/20448287/Etidorhpa-John-Uri-Lloyd producing urls like http://htmlimg3.scribdassets.com/1qva8jpekgdk0wl/images/1-bfa8361a96.jpg diff --git a/util.c b/util.c index 41bbc3c..d20e4e3 100644 --- a/util.c +++ b/util.c @@ -49,14 +49,17 @@ int dial(char *host, char *port) { return srv; } -int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf) { +int get(char *host, char *path, char *sendcookie, char *savecookie, char **body) { size_t l, res; int fd, i, p; char h[BUFSIZ] = ""; char c[COOKIEMAX] = ""; - char t[BUFSIZ]; - char *t2; char m[256]; + char *headpos; + size_t headsize; + char headline[BUFSIZ] = ""; + char *buf; + char *cur, *pos; if((fd = dial(host, "80")) == -1) return 0; @@ -66,40 +69,42 @@ int get(char *host, char *path, char *sendcookie, char *savecookie, char **buf) " (not mozilla)\r\nHost: %s%s\r\n\r\n", path, host, c); if(!send(fd, h, strlen(h), 0)) return 0; - *buf = NULL; + /* download everything into buf */ l = 0; - h[0] = 0; - snprintf(m, 256, "Set-Cookie: %%%ds;", COOKIEMAX-1); + buf = malloc(sizeof(char *) * BUFSIZ); + for(; buf != NULL && (res = recv(fd, buf+l, BUFSIZ, 0)) > 0; l+=res) + buf = realloc(buf, sizeof(char *) * (l+BUFSIZ)); - while((res = recv(fd, t, BUFSIZ, 0)) > 0) { - strncat(h, t, BUFSIZ - strlen(h) - 1); - if((t2 = strstr(t, "\r\n\r\n")) != NULL && (t2 - t) < (signed)res) { - /* end of header, save rest to buffer */ - t2+=4; - l = res - (t2 - t); - *buf = malloc(sizeof(char *) * l); - memcpy(*buf, t2, l); - break; - } - } + /* strstr to find end of header */ + if((headpos = strstr(buf, "\r\n\r\n")) == NULL) + return 0; + headpos += 4; + headsize = headpos - buf; - if(sscanf(h, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) { - if(p == 403) - fprintf(stderr, "403 forbidden: your IP address may be temporarily blocked\n"); + /* memcopy from there into a large enough buf */ + if((*body = malloc(sizeof(char *) * (l - headsize))) == NULL) return 0; - } - t2 = h; - if(savecookie != NULL) { - while((t2 = strstr(t2, "Set-Cookie: ")) && sscanf(t2, m, c)) { + memcpy(*body, headpos, sizeof(char *) * (l - headsize)); + + /* parse header as needed */ + snprintf(m, 256, "Set-Cookie: %%%ds;", COOKIEMAX-1); + cur = buf; + while((pos = strstr(cur, "\r\n")) != NULL && cur < (headpos - 4)) { + strncpy(headline, cur, pos - cur); + headline[pos - cur] = '\0'; + cur = pos + 2; + + if(sscanf(headline, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) { + if(p == 403) + fprintf(stderr, "403 forbidden: your IP address may be temporarily blocked\n"); + return 0; + } + + if(savecookie != NULL && sscanf(headline, m, c)) { strncat(savecookie, c, COOKIEMAX - strlen(savecookie) - 1); - t2++; } } - *buf = realloc(*buf, sizeof(char *) * (l+BUFSIZ)); - for(; buf != NULL && (res = recv(fd, *buf+l, BUFSIZ, 0)) > 0; l+=res) - *buf = realloc(*buf, sizeof(char *) * (l+BUFSIZ)); - return l; } @@ -129,12 +134,16 @@ int gettofile(char *host, char *url, char *sendcookie, char *savecookie, char *s return 0; } -int post(char *host, char *path, char *data, char **buf) { +/* TODO: merge this with get(); almost all code is the same */ +int post(char *host, char *path, char *data, char **body) { size_t l, res; int fd, i, p; char h[BUFSIZ] = ""; - char t[BUFSIZ]; - char *t2; + char *headpos; + size_t headsize; + char headline[BUFSIZ] = ""; + char *buf; + char *cur, *pos; if((fd = dial(host, "80")) == -1) return 0; @@ -145,25 +154,37 @@ int post(char *host, char *path, char *data, char **buf) { path, (int)strlen(data), host, data); if(!send(fd, h, strlen(h), 0)) return 0; - *buf = NULL; + /* download everything into buf */ l = 0; - while((res = recv(fd, t, BUFSIZ, 0)) > 0) { - if(sscanf(t, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) + buf = malloc(sizeof(char *) * BUFSIZ); + for(; buf != NULL && (res = recv(fd, buf+l, BUFSIZ, 0)) > 0; l+=res) + buf = realloc(buf, sizeof(char *) * (l+BUFSIZ)); + + /* strstr to find end of header */ + if((headpos = strstr(buf, "\r\n\r\n")) == NULL) + return 0; + headpos += 4; + headsize = headpos - buf; + + /* memcopy from there into a large enough buf */ + if((*body = malloc(sizeof(char *) * (l - headsize))) == NULL) + return 0; + memcpy(*body, headpos, sizeof(char *) * (l - headsize)); + + /* parse header as needed */ + cur = buf; + while((pos = strstr(cur, "\r\n")) != NULL && cur < (headpos - 4)) { + strncpy(headline, cur, pos - cur); + headline[pos - cur] = '\0'; + cur = pos + 2; + + if(sscanf(headline, "HTTP/%d.%d %d", &i, &i, &p) == 3 && p != 200) { + if(p == 403) + fprintf(stderr, "403 forbidden: your IP address may be temporarily blocked\n"); return 0; - t2 = t; - if((t2 = strstr(t, "\r\n\r\n")) != NULL && (t2 - t) < (signed)res) { - t2+=4; - l = res - (t2 - t); - *buf = malloc(sizeof(char *) * l); - memcpy(*buf, t2, l); - break; } } - *buf = realloc(*buf, sizeof(char *) * (l+BUFSIZ)); - for(; (res = recv(fd, *buf+l, BUFSIZ, 0)) > 0; l+=res) - *buf = realloc(*buf, sizeof(char *) * (l+BUFSIZ)); - return l; } -- cgit v1.2.3