summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick White <hg@njw.me.uk>2011-08-07 12:46:52 +0100
committerNick White <hg@njw.me.uk>2011-08-07 12:46:52 +0100
commit62563596f477238d480fe4a701544413b6c722f5 (patch)
tree91379e6c654e9e9ff47793a892aa990e737e85dc
parent3d08e78700331588f6d43db725cc361f841c012d (diff)
Abide by google's robots.txt, and lay out legal issues
-rw-r--r--LEGAL27
-rw-r--r--getgbook.c4
2 files changed, 29 insertions, 2 deletions
diff --git a/LEGAL b/LEGAL
new file mode 100644
index 0000000..ec1a2c8
--- /dev/null
+++ b/LEGAL
@@ -0,0 +1,27 @@
+# Getgbook
+
+## TOS
+
+Google's terms of service forbid using anything but a browser
+to access their sites. This is absurd and ruinous.
+See section 5.3 of http://www.google.com/accounts/TOS.
+
+Thankfully, however, for Google Books one is only bound to it
+"for digital content you purchase through the Google Books
+service," which does not affect this program.
+See http://www.google.com/googlebooks/tos.html
+
+## robots.txt
+
+Their robots.txt allows certain book pages, but disallows
+others.
+
+We use two types of URL:
+http://books.google.com/books?id=<bookid>&pg=<pgcode>&jscmd=click3
+http://books.google.com/books?id=<bookid>&pg=<pgcode>&img=1&zoom=3&hl=en&<sig>
+
+robots.txt disallows /books?*jscmd=* and /books?*pg=*. However,
+Google consider Allow statements to overrule disallow statements
+if they are longer. And they happen to allow /books?*q=subject:*.
+So, we append that to both url types (it has no effect on them),
+and we are obeying robots.txt
diff --git a/getgbook.c b/getgbook.c
index b4a23af..5f0a0ae 100644
--- a/getgbook.c
+++ b/getgbook.c
@@ -29,7 +29,7 @@ Page *getpagedetail(char *bookid, char *pg, char *cookie)
char *c, *d, *p, *buf = NULL;
Page *page;
- snprintf(url, URLMAX, "/books?id=%s&pg=%s&jscmd=click3", bookid, pg);
+ snprintf(url, URLMAX, "/books?id=%s&pg=%s&jscmd=click3&q=subject:a", bookid, pg);
if(!get("books.google.com", url, cookie, NULL, &buf))
return NULL;
@@ -51,7 +51,7 @@ Page *getpagedetail(char *bookid, char *pg, char *cookie)
} else
*p = *d;
}
- *p = '\0';
+ strncpy(p, "&q=subject:a", 12);
} else
d=c;