diff options
author | Nick White <arch@njw.me.uk> | 2007-05-03 00:31:22 +0000 |
---|---|---|
committer | Nick White <arch@njw.me.uk> | 2007-05-03 00:31:22 +0000 |
commit | f811c2d4823f95d7e90f25e0e7a98e5c5abcf3e2 (patch) | |
tree | 190283d4cf1efdd3ae5ba8ecaa0b6c5b5837bec9 /src | |
parent | 1edf37e3b0ad7b0556ba0902b5880044933ced66 (diff) |
Added Autotools, changed dir structure, added docs
Added (perhaps somewhat shaky) autotools support
Added tagging rules to junkify files generated by autotools
Added a directory structure
Updated README & INSTALL files to reflect changes
Wrote a man page
Changed version numbers in preparation for a release
git-archimport-id: getht@sv.gnu.org/getht--mainline--0.1--patch-23
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 7 | ||||
-rw-r--r-- | src/config.c | 168 | ||||
-rw-r--r-- | src/download.c | 228 | ||||
-rw-r--r-- | src/getht.c | 394 | ||||
-rw-r--r-- | src/getht.h | 47 | ||||
-rw-r--r-- | src/issue.h | 77 | ||||
-rw-r--r-- | src/issuemem.c | 174 | ||||
-rw-r--r-- | src/mediarev.c | 150 | ||||
-rw-r--r-- | src/mediaxml.c | 246 | ||||
-rw-r--r-- | src/tocxml.c | 264 | ||||
-rw-r--r-- | src/version.h | 22 | ||||
-rw-r--r-- | src/xml.c | 62 |
12 files changed, 1839 insertions, 0 deletions
diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..0bf3452 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,7 @@ +## A simple file to process with Automake, to produce Makefile.in + +bin_PROGRAMS = getht + +getht_SOURCES = config.c download.c getht.c issuemem.c \ + mediarev.c mediaxml.c tocxml.c xml.c \ + getht.h issue.h version.h diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..57807b1 --- /dev/null +++ b/src/config.c @@ -0,0 +1,168 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <string.h> + +#include "getht.h" + +extern proxytype proxy_type; +extern char proxy_addr[STR_MAX]; +extern long proxy_port; +extern proxyauth proxy_auth; +extern char proxy_user[STR_MAX]; +extern char proxy_pass[STR_MAX]; + +int loadconfig(char * htde_path, char * issue_path, int * update) +/* Loads variables from config file to extern and passed + * variables. */ +{ + FILE * config_file; + char filepath[STR_MAX]; + + snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + + if((config_file = fopen(filepath,"r")) == NULL) + { + fprintf(stderr,"Cannot open file %s for reading.\n",filepath); + return 1; + } + + char parameter[80], option[80]; + while(!feof(config_file)) + { + fscanf(config_file, "%s = %s", option, parameter); + + if(option[0] == '#'); /* ignore lines beginning with a hash */ + else if(!strcmp(option, "issuepath")) + strncpy(issue_path, parameter, STR_MAX); + else if(!strcmp(option, "startup_check")) + *update = atoi(parameter); + else if(!strcmp(option, "proxy_type")) + { + if(!strcmp(parameter, "http")) + proxy_type = HTTP; + else if(!strcmp(parameter, "socks4")) + proxy_type = SOCKS4; + else if(!strcmp(parameter, "socks5")) + proxy_type = SOCKS5; + else + fprintf(stderr, + "Proxy type %s not known, please use either http, socks4 or socks5", + parameter); + } + else if(!strcmp(option, "proxy_address")) + strncpy(proxy_addr, parameter, STR_MAX); + else if(!strcmp(option, "proxy_port")) + proxy_port = (long) atoi(parameter); + else if(!strcmp(option, "proxy_auth")) + { + if(!strcmp(parameter, "basic")) + proxy_auth = BASIC; + else if(!strcmp(parameter, "digest")) + proxy_auth = DIGEST; + else if(!strcmp(parameter, "ntlm")) + proxy_auth = NTLM; + else + fprintf(stderr, + "Proxy authentication method %s not known, please use basic, digest or ntlm", + parameter); + } + else if(!strcmp(option, "proxy_user")) + strncpy(proxy_user, parameter, STR_MAX); + else if(!strcmp(option, "proxy_pass")) + strncpy(proxy_pass, parameter, STR_MAX); + else + fprintf(stderr, "Option '%s' not recognised, ignoring\n", option); + } + + return 0; +} + +int writefreshconfig(char * htde_path, char * issue_path, int * update) +/* Write a new config file according to extern and passed variables. */ +{ + FILE * config_file; + char filepath[STR_MAX]; + + snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + + if((config_file = fopen(filepath,"w")) == NULL) + { + fprintf(stderr,"Cannot open file %s for writing.\n",filepath); + return 1; + } + else + fprintf(stdout,"Writing a fresh config file to %s.\n",filepath); + + if(issue_path[0]) + fprintf(config_file, "%s = %s\n", "issuepath", issue_path); + if(update) + fprintf(config_file, "%s = %i\n", "startup_check", *update); + if(proxy_type != NONE) + { + if(proxy_type = HTTP) + fprintf(config_file, "%s = %s\n", "proxy_type", "http"); + else if(proxy_type = SOCKS4) + fprintf(config_file, "%s = %s\n", "proxy_type", "socks4"); + else if(proxy_type = SOCKS5) + fprintf(config_file, "%s = %s\n", "proxy_type", "socks5"); + } + if(proxy_addr[0]) + fprintf(config_file, "%s = %s\n", "proxy_address", proxy_addr); + if(proxy_port) + fprintf(config_file, "%s = %i\n", "proxy_port", proxy_port); + if(proxy_auth != NOAUTH) + { + if(proxy_auth = BASIC) + fprintf(config_file, "%s = %s\n", "proxy_auth", "basic"); + else if(proxy_auth = DIGEST) + fprintf(config_file, "%s = %s\n", "proxy_auth", "digest"); + else if(proxy_auth = NTLM) + fprintf(config_file, "%s = %s\n", "proxy_auth", "ntlm"); + } + if(proxy_user[0]) + fprintf(config_file, "%s = %s\n", "proxy_user", proxy_user); + if(proxy_pass[0]) + fprintf(config_file, "%s = %s\n", "proxy_pass", proxy_pass); + + return 0; +} + +int updateconfig(char * htde_path, char * issue_path, int * update) +/* Read existing config file, and rewrite any variables which differ + * in memory. */ +{ + FILE * config_file; + char filepath[STR_MAX]; + + snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + + if((config_file = fopen(filepath,"rw")) == NULL) + { + fprintf(stderr,"Cannot open file %s for reading/writing.\n",filepath); + return 1; + } + + fprintf(stderr,"Not yet implemented.\n"); + + return 1; +} diff --git a/src/download.c b/src/download.c new file mode 100644 index 0000000..1108b6d --- /dev/null +++ b/src/download.c @@ -0,0 +1,228 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <curl/curl.h> +#include <curl/types.h> +#include <curl/easy.h> + +#include "getht.h" +#include "issue.h" + +int read_func(void *ptr, size_t size, size_t nmemb, FILE *stream) + { return fread(ptr, size, nmemb, stream); } +int write_func(void *ptr, size_t size, size_t nmemb, FILE *stream) + { return fwrite(ptr, size, nmemb, stream); } +int update_progress(void *data, double dltotal, double dlnow, + double ultotal, double ulnow); + +extern proxytype proxy_type; +extern char proxy_addr[STR_MAX]; +extern long proxy_port; +extern proxyauth proxy_auth; +extern char proxy_user[STR_MAX]; +extern char proxy_pass[STR_MAX]; +extern CURL *main_curl_handle; + +int save_file(CURL *curl_handle, char *url, char *filepath) +/* Save the file *url to *filepath */ +{ + printf("Downloading %s\n",url); + + if(curl_handle == NULL) + curl_handle = main_curl_handle; + + if(curl_handle) { + FILE *file; + if((file = fopen(filepath,"w")) == NULL) + { + fprintf(stderr,"Error: cannot open file %s for writing.\n",filepath); + return 1; + } + + curl_easy_setopt(curl_handle, CURLOPT_URL, url); + curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_func); + curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, file); + curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, read_func); + + if(proxy_type != NONE) + { + if(proxy_type == HTTP) + curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); + else if(proxy_type == SOCKS4) + curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4); + else if(proxy_type == SOCKS5) + curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); + + curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy_addr); + + if(proxy_port) + curl_easy_setopt(curl_handle, CURLOPT_PROXYPORT, proxy_port); + + if(proxy_auth != NOAUTH) + { + if(proxy_auth == BASIC) + curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_BASIC); + else if(proxy_auth == DIGEST) + curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_DIGEST); + else if(proxy_auth == NTLM) + curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_NTLM); + + if(proxy_user[0] && proxy_pass[0]) + { + char userpass[STR_MAX]; + snprintf(userpass, STR_MAX, "%s:%s", proxy_user, proxy_pass); + curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, userpass); + } + } + } + + curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0); + curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, update_progress); + + if(curl_easy_perform(curl_handle)) + { + remove(filepath); + fprintf(stderr,"Error, could not download %s\n",url); + return 1; + } + +/* double d; + curl_easy_getinfo(curl_handle, CURLINFO_SIZE_DOWNLOAD, &d); + printf("Total downloaded: %lf\n",d); + + curl_easy_getinfo(curl_handle, CURLINFO_SPEED_DOWNLOAD, &d); + printf("Average speed downloaded: %lf\n",d); + + curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); + printf("Content Length: %lf\n",d); +*/ + fclose(file); + + printf("\n"); + } + else { + fprintf(stderr,"Error: curl failed to initialise.\n"); + printf("Could not download %s\n",url); + return 1; + } + return 0; +} + +int update_progress(void *data, double dltotal, double dlnow, + double ultotal, double ulnow) +/* Print status information */ +{ + double frac; + frac = 100 * dlnow / dltotal; + printf("\rDownload progress: %3.0lf%% ", frac); + fflush(stdout); + + return 0; +} + +void downloadissue(CURL *curl_handle, char * directory, iss * issue, int force) +/* Download issue pointed to */ +{ + sec * cur_section; + char newdir[STR_MAX]; + char filename[STR_MAX]; + FILE * testfile; + + snprintf(newdir,STR_MAX,"%s/%i_%i-%i",directory, + issue->date.year,issue->date.firstmonth,issue->date.lastmonth); + + printf("Downloading issue entitled '%s' to '%s'\n",issue->title,newdir); + + if(!opendir(newdir)) + if(mkdir(newdir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) + { + printf("Please enter the path of a directory to save issues in: "); + scanf("%s", newdir); /* TODO: incorporate tab-completion */ + } + + int count; + for(count = 0; count <= issue->no_of_sections; count++) + { + cur_section = issue->section[count]; + + snprintf(filename,STR_MAX,"%s/section_%i.pdf", newdir, cur_section->number); + if(!force){ + testfile = fopen(filename, "r"); + if(!testfile) + save_file(curl_handle, cur_section->uri, filename); + else + { + fclose(testfile); + printf("Skipping download of section %i\n", cur_section->number); + } + } + else + save_file(curl_handle, cur_section->uri, filename); + } +} + +void downloadmedia(CURL * curl_handle, char * directory, med * media, int force) +/* Download media pointed to. */ +{ + char newdir[STR_MAX], filename[STR_MAX], fn[STR_MAX]; + FILE * testfile; + + /* for the moment just save to down/media, in future save to issue directories + * (regardless of if they're occupied) */ + snprintf(newdir,STR_MAX,"%s/%s",directory,"media"); + + if(!opendir(newdir)) + if(mkdir(newdir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) + { + printf("Please enter the path of a directory to save media in: "); + scanf("%s", newdir); /* TODO: incorporate GETTEXT */ + } + + /* get the position of the final forwardslash of the uri */ + int slashpos; + for(slashpos=strlen(media->uri); + slashpos>=0 && media->uri[slashpos] != '/'; + slashpos--); + + /* copy from there to fn */ + int tmp; + for(tmp=0, slashpos++; media->uri[slashpos]; tmp++, slashpos++) + fn[tmp] = media->uri[slashpos]; + fn[tmp] = '\0'; + + snprintf(filename,STR_MAX,"%s/%s", newdir, fn); + + if(!force){ + testfile = fopen(filename, "r"); + if(!testfile) + save_file(curl_handle, media->uri, filename); + else + { + fclose(testfile); + printf("Skipping download of media item %s\n", media->title); + } + } + else + save_file(curl_handle, media->uri, filename); +} diff --git a/src/getht.c b/src/getht.c new file mode 100644 index 0000000..5d2c39c --- /dev/null +++ b/src/getht.c @@ -0,0 +1,394 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> +#include <string.h> +#include <sys/stat.h> +#include <curl/curl.h> + +#include "version.h" +#include "issue.h" +#include "getht.h" + +int update_contents_files(); + +med * findnewestmed(iss ** issue, int no_of_issues); + +void show_iss_struct(iss ** issue, int no_of_issues); + +void showusage(); + +proxytype proxy_type; char proxy_addr[STR_MAX]; long proxy_port; +proxyauth proxy_auth; +char proxy_user[STR_MAX]; char proxy_pass[STR_MAX]; +char issue_xml[STR_MAX]; char media_xml[STR_MAX]; char media_rev[STR_MAX]; +CURL *main_curl_handle; + +int main(int argc, char *argv[]) +{ + char getht_path[STR_MAX]; + char save_path[STR_MAX]; + + /* Define & set up paths */ + snprintf(getht_path,STR_MAX,"%s/.getht",getenv("HOME")); + + if(!opendir(getht_path)) + if(mkdir(getht_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) + { + fprintf(stderr,"Cannot open/create directory %s\n",getht_path); + printf("Please enter the path of a directory to save settings in: "); + scanf("%s", getht_path); + } + + snprintf(issue_xml,STR_MAX,"%s/%s",getht_path,ISS_XML_FILE); + snprintf(media_xml,STR_MAX, "%s/%s", getht_path, MED_XML_FILE); + snprintf(media_rev,STR_MAX,"%s/%s",getht_path,MED_REVGZ_FILE); + + snprintf(save_path,STR_MAX,"%s/hinduism_today",getenv("HOME")); + + int downall = 0, downlatest = 0; + int downallmedia = 0, downlatestmedia = 0; + int downissue = 0, downmedia = 0; + int force = 0, update = 0, showstr = 0; + int option = 0; + + proxy_type = NONE; + proxy_port = 0; + proxy_addr[0] = '\0'; + proxy_auth = NOAUTH; + proxy_user[0] = '\0'; + proxy_pass[0] = '\0'; + + if(loadconfig(getht_path, &save_path, &update) != 0) + writefreshconfig(getht_path, &save_path, &update); + + if(!opendir(save_path)) + if(mkdir(save_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) + { + fprintf(stderr,"Cannot open/create directory %s",save_path); + printf("Please enter the path of a directory to save issues in: "); + scanf("%s", save_path); + + updateconfig(getht_path, &save_path, NULL); + } + + /* Parse command line options */ + char c; + static struct option long_opts[] = + { + {"download-all", no_argument, 0, 'a'}, + {"download-latest", no_argument, 0, 'd'}, + {"download-all-media", no_argument, 0, 'm'}, + {"download-latest-media", no_argument, 0, 'n'}, + {"force", no_argument, 0, 'f'}, + {"update", no_argument, 0, 'u'}, + {"tocfile", required_argument, 0, 't'}, + {"mediatocfile", required_argument, 0, 'x'}, + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, + {0, 0, 0, 0} + }; + while((c = getopt_long(argc, argv, "adfhmnsuvx:t:", long_opts, NULL)) != -1) { + switch(c) { + case 'a': + downall = 1; + downissue = 1; + option = 1; + break; + case 'd': + downlatest = 1; + downissue = 1; + option = 1; + break; + case 'm': + downallmedia = 1; + downmedia = 1; + option = 1; + break; + case 'n': + downlatestmedia = 1; + downmedia = 1; + option = 1; + break; + case 'f': + force = 1; + option = 1; + break; + case 'u': + update = 1; + option = 1; + break; + case 's': + showstr = 1; + option = 1; + break; + case 't': + strcpy(issue_xml, strdup(optarg)); + option = 1; + break; + case 'x': + strcpy(media_xml, strdup(optarg)); + option = 1; + break; + case 'h': + showusage(); + return 0; + break; + case 'v': + printf("GetHT version: %s\n",VERSION); + option = 1; + return 0; + break; + } + } + + if(!option) + { + showusage(); + return 0; + } + + main_curl_handle = curl_easy_init(); + + if(update) + { + if(update_contents_files()) + fprintf(stderr,"Could not update contents files\n"); + } + + /* Parse TOC, filling issue structure */ + + iss **issue; + int no_of_issues = -1; + int latest_index = -1; + int i; + + if(downissue || showstr) + { + issue = parsetoc(issue_xml, &no_of_issues, &latest_index); + + if(!issue) + { + if(!update) + { + printf("Cannot open contents file, trying to update contents\n"); + if(update_contents_files()) + return 1; + issue = parsetoc(issue_xml, &no_of_issues, &latest_index); + } + else + { + printf("Cannot open contents file, try running `getht --update`\n"); + return 1; + } + } + + if(latest_index == -1) + { + fprintf(stderr, "Error: Cannot ascertain latest issue. "); + fprintf(stderr, "Defaulting to first issue in contents file\n"); + latest_index = 0; + } + + if(downall) + { + for(i = 0; i < no_of_issues; i++) + downloadissue(NULL, save_path, issue[i], force); + } + + if(downlatest) + downloadissue(NULL, save_path, issue[latest_index], force); + } + + if(downmedia || showstr) + { + int newest; + + issue = parsemedia(media_xml, issue, &no_of_issues); + + if(!issue) + { + if(!update) + { + printf("Cannot open media contents file, trying to update contents\n"); + if(update_contents_files()) + return 1; + issue = parsemedia(media_xml, issue, &no_of_issues); + } + else + { + printf("Cannot open contents file, try running `getht --update`\n"); + return 1; + } + } + + if(downlatestmedia) + { + newest = findnewestiss(issue, no_of_issues); + for(i = 0; i <= issue[newest]->no_of_media; i++) + downloadmedia(NULL, save_path, issue[newest]->media[i], force); + } + + if(downallmedia) + { + for(i = 0; i <= no_of_issues; i++) + { + for(newest = 0; newest <= issue[i]->no_of_media; newest++) + downloadmedia(NULL, save_path, issue[i]->media[newest], force); + } + } + } + + if(showstr) + show_iss_struct(issue, no_of_issues); + + /* Ensure curl cleans itself up */ + curl_easy_cleanup(main_curl_handle); + + return 0; +} + +int update_contents_files() +/* Returns 0 on success, 1 on failure */ +{ + if(save_file(NULL, XML_TOC_URL, issue_xml)) + return 1; + + char isstitle[STR_MAX]; + issdates date; + + /* see if current issue's media toc has already + been written to the xml, if not do so */ + if(access(issue_xml, R_OK) == 0) + { + if(cur_identifiers(issue_xml, isstitle, &date)) + return 1; + } + else + return 1; + + if(media_accounted_for(media_xml, &date)) + { + if(save_file(NULL, MEDIA_TOC_URL, media_rev)) + return 1; + + med ** temp_med; + int med_no = -1; + + temp_med = parsemediagz(media_rev, &med_no); + + unlink(media_rev); + + addmediaissue(media_xml, isstitle, &date, temp_med, med_no); + } + + return 0; +} + +void show_iss_struct(iss ** issue, int no_of_issues) +/* Prints issue information */ +{ + int iss_no, sec_no, med_no, it_no; + printf("%i Issues\n",no_of_issues); + for(iss_no=0;iss_no<=no_of_issues;iss_no++) + { + printf("-Issue %i-\n", (iss_no+1)); + printf("Title:\t'%s'\n", issue[iss_no]->title); + printf("Preview URI:\t'%s'\n", issue[iss_no]->preview_uri); + printf("Months:\t'%i' - '%i'\n",issue[iss_no]->date.firstmonth,issue[iss_no]->date.lastmonth); + printf("Year:\t'%i'\n",issue[iss_no]->date.year); + printf("Number of Sections:\t'%i'\n",issue[iss_no]->no_of_sections); + + for(sec_no=0; sec_no <= (issue[iss_no]->no_of_sections); sec_no++) + { + printf("\t-Section %i-\n", (sec_no)); + printf("\tTitle:\t'%s'\n", issue[iss_no]->section[sec_no]->title); + printf("\tURI:\t'%s'\n", issue[iss_no]->section[sec_no]->uri); + printf("\tNo. of Items:\t'%i'\n", issue[iss_no]->section[sec_no]->no_of_items); + + for(it_no=0; it_no <= issue[iss_no]->section[sec_no]->no_of_items; it_no++) + { + printf("\t\t-Item-\n"); + printf("\t\tTitle:\t'%s'\n",issue[iss_no]->section[sec_no]->item[it_no]->title); + printf("\t\tFirst page:\t'%i'",issue[iss_no]->section[sec_no]->item[it_no]->firstpage); + printf("\tLast page:\t'%i'\n",issue[iss_no]->section[sec_no]->item[it_no]->lastpage); + } + it_no = 0; + } + sec_no = 0; + + printf("Number of Media:\t'%i'\n",issue[iss_no]->no_of_media); + + for(med_no=0; med_no <= (issue[iss_no]->no_of_media); med_no++) + { + printf("\t-Media %i-\n", (med_no)); + printf("\tTitle:\t'%s'\n", issue[iss_no]->media[med_no]->title); + printf("\tURI:\t'%s'\n", issue[iss_no]->media[med_no]->uri); + printf("\tComment:\t'%s'\n", issue[iss_no]->media[med_no]->comment); + printf("\tPreview URI:\t'%s'\n", issue[iss_no]->media[med_no]->preview_uri); + } + med_no = 0; + } +} + +int findnewestiss(iss ** issue, int no_of_issues) +/* returns newest issue indice */ +{ + iss * tmp_issue; issdates newest; + int new_iss; + + new_iss = -1; + newest.year = 0; newest.firstmonth = 0; newest.lastmonth = 0; + + int i; + + for(i = 0; i <= no_of_issues; i++) + { + if(issue[i]->date.year > newest.year || + (issue[i]->date.year == newest.year && issue[i]->date.firstmonth > newest.firstmonth)) + { + newest.year = issue[i]->date.year; + newest.firstmonth = issue[i]->date.firstmonth; + newest.lastmonth = issue[i]->date.lastmonth; + new_iss = i; + } + } + + return new_iss; +} + +void showusage() +{ + printf("Usage: getht -u -a -d -m -n -f [-t tocfile] -h -v\n"); + printf("-u | --update Update contents files\n"); + printf("-a | --download-all Download all issues\n"); + printf("-d | --download-latest Download latest issue\n"); + printf("-m | --download-all-media Download all media\n"); + printf("-n | --download-latest-media Download latest issue's media\n"); + printf("-f | --force Force re-download of existing files\n"); + printf("-t | --tocfile file Use alternative contents xml file\n"); + printf("-x | --mediatocfile file Use alternative media contents xml file\n"); + printf("-h | --help Print this help message\n"); + printf("-v | --version Print version information\n"); +} diff --git a/src/getht.h b/src/getht.h new file mode 100644 index 0000000..df2b1b2 --- /dev/null +++ b/src/getht.h @@ -0,0 +1,47 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#define XML_TOC_URL "http://www.hinduismtoday.com/digital/htde_toc.xml" +#define MEDIA_TOC_URL "http://www.hinduismtoday.com/digital/htde_media-player.rev.gz" + +#define MED_XML_FILE "media_toc.xml" +#define ISS_XML_FILE "htde_toc.xml" +#define MED_REVGZ_FILE "htde_media-player.rev.gz" + +#define STR_MAX 512 + +#define MAX_ISS 10 + +typedef enum +{ + NONE, + HTTP, + SOCKS4, + SOCKS5 +} proxytype; + +typedef enum +{ + NOAUTH, + BASIC, + DIGEST, + NTLM, +} proxyauth; diff --git a/src/issue.h b/src/issue.h new file mode 100644 index 0000000..d224dcf --- /dev/null +++ b/src/issue.h @@ -0,0 +1,77 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* all sizes are in bytes and are child-inclusive */ + +typedef struct +{ + int year; + int firstmonth; + int lastmonth; +} issdates; + +typedef struct +{ + int firstpage; + int lastpage; + char * title; +} it; + +typedef struct +{ + char uri[512]; + char title[512]; + int number; + int size; + it ** item; + int no_of_items; +} sec; + +typedef struct +{ + char uri[512]; + char title[512]; + char comment[512]; + char preview_uri[512]; + int size; +} med; + +typedef struct +{ + char preview_uri[512]; + char title[512]; + int size; + issdates date; + sec ** section; + int no_of_sections; + med ** media; + int no_of_media; +} iss; + +iss ** parsetoc(char *filepath, int * iss_no, int * latest); +iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues); + +med ** parsemediagz(char * media_path, int * no_of_media); + +iss ** assignnew_iss(iss ** issue, int *no_of_issues); +sec ** assignnew_sec(sec ** section, int * no_of_sections); +it ** assignnew_it(it ** item, int * no_of_items); +med ** assignnew_med(med ** media, int * no_of_media); diff --git a/src/issuemem.c b/src/issuemem.c new file mode 100644 index 0000000..3a40d7e --- /dev/null +++ b/src/issuemem.c @@ -0,0 +1,174 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "issue.h" + +void nogo_mem() +/* called if memory assignation fails + * TODO: handle freeing of memory to avoid leaks */ +{ + fprintf(stderr, "Could not assign memory, exitting\n"); + exit(1); +} + +iss ** assignnew_iss(iss ** issue, int *no_of_issues) +/* assign memory for new issue */ +{ + iss ** tmp = NULL; + + if(*no_of_issues < 0) + { /* make issue** a new array of issue pointers */ + if( (tmp = malloc(sizeof(iss *))) == NULL ) + nogo_mem(); + } + else + { /* add a new pointer to issue pointer list */ + if( (tmp = realloc(issue, sizeof(iss *) + (((*no_of_issues)+1) * sizeof(iss *)))) == NULL ) + nogo_mem(); + } + + (*no_of_issues)++; + + /* make new array item a pointer to issue */ + if( (tmp[*no_of_issues] = malloc(sizeof(iss))) == NULL ) + nogo_mem(); + + return tmp; +} + +sec ** assignnew_sec(sec ** section, int *no_of_sections) +/* assign memory for new section */ +{ + sec ** tmp = NULL; + + if(*no_of_sections < 0) + { /* make **section a new array of section pointers */ + if( (tmp = malloc(sizeof(sec *))) == NULL ) + nogo_mem(); + } + else + { /* add a new pointer to section pointer list */ + if( (tmp = realloc(section, sizeof(sec *) + (((*no_of_sections)+1) * sizeof(sec *)))) == NULL ) + nogo_mem(); + } + + (*no_of_sections)++; + + /* make new array item a pointer to issue */ + if( (tmp[*no_of_sections] = malloc(sizeof(sec))) == NULL ) + nogo_mem(); + + return tmp; +} + +it ** assignnew_it(it ** item, int * no_of_items) +{ + it ** tmp = NULL; + + if(*no_of_items < 0) + { /* make **item a new array of item pointers */ + if( (tmp = malloc(sizeof(it *))) == NULL ) + nogo_mem(); + } + else + { /* add a new pointer to item pointer list */ + if( (tmp = realloc(item, sizeof(it *) + (((*no_of_items)+1) * sizeof(it *)))) == NULL ) + nogo_mem(); + } + + (*no_of_items)++; + + /* make new array item a pointer to item */ + if( (tmp[*no_of_items] = malloc(sizeof(it))) == NULL ) + nogo_mem(); + + return tmp; +} + +med ** assignnew_med(med ** media, int * no_of_media) +{ + med ** tmp = NULL; + + if(*no_of_media < 0) + { /* make **section a new array of section pointers */ + if( (tmp = malloc(sizeof(med *))) == NULL ) + nogo_mem(); + } + else + { /* add a new pointer to media pointer list */ + if( (tmp = realloc(media, sizeof(med *) + (((*no_of_media)+1) * sizeof(med *)))) == NULL ) + nogo_mem(); + } + + (*no_of_media)++; + + /* make new array item a pointer to issue */ + if( (tmp[*no_of_media] = malloc(sizeof(med))) == NULL ) + nogo_mem(); + + return tmp; +} + +int issuesort(iss ** issue, int no_of_issues) +/* does a basic bubble sort, by date, returning sorted issue */ +{ + int sortindex[no_of_issues]; + + int count1, count2, temp; + + for(count1 = 0; count1 <= no_of_issues; count1++) + sortindex[count1] = count1; + + /* find correct order of issues using a bubble sort */ + for(count1 = 0; count1 <=no_of_issues; count1++) + { + for(count2 = 0; count2 < no_of_issues; count2++) + { + if(issue[sortindex[count2]]->date.year < issue[sortindex[count2+1]]->date.year) + { + temp = sortindex[count2]; + sortindex[count2] = sortindex[count2+1]; + sortindex[count2+1] = temp; + } + else if((issue[sortindex[count2]]->date.year == issue[sortindex[count2+1]]->date.year) && + (issue[sortindex[count2]]->date.firstmonth < issue[sortindex[count2+1]]->date.firstmonth)) + { + temp = sortindex[count2]; + sortindex[count2] = sortindex[count2+1]; + sortindex[count2+1] = temp; + } + } + } + + iss * sortedissue[no_of_issues]; + + for(count1 = 0; count1 <= no_of_issues; count1++) + sortedissue[count1] = issue[sortindex[count1]]; + + for(count1 = 0; count1 <= no_of_issues; count1++) + issue[count1] = sortedissue[count1]; + + return 0; +} diff --git a/src/mediarev.c b/src/mediarev.c new file mode 100644 index 0000000..e8dd3e6 --- /dev/null +++ b/src/mediarev.c @@ -0,0 +1,150 @@ +/* + * Copyright 2006 Nick White + * + * This mediagz is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <zlib.h> + +#include "getht.h" +#include "issue.h" + +int smilurl(char * smilurl, med * cur_media); +void getquote(char * input, char * label); +void removeleadingspace(char * cur_line); + +med ** parsemediagz(char * media_path, int * no_of_media) +/* Parses gzipped adobe pagemaker files for media urls and metadata, + * filling media with the information. */ +{ + char cur_line[STR_MAX]; + gzFile mediagz; + + med ** media = NULL; + + strcpy(cur_line,""); /* reset cur_line */ + + mediagz = gzopen(media_path, "r"); + + med * cur_media; + + while(gzeof(mediagz)==0) + { + gzgets(mediagz, cur_line, STR_MAX); + cur_line[strlen(cur_line)-1] = '\0'; /* get rid of trailing newline */ + + if(!strcmp(cur_line,"on mouseUp")) + { + strcpy(cur_line,""); /* reset cur_line */ + + /* assign memory for the new media */ + media = assignnew_med(media, no_of_media); + + cur_media = media[*no_of_media]; + + /* setup media globals */ + cur_media->uri[0] = '\0'; + cur_media->title[0] = '\0'; + cur_media->comment[0] = '\0'; + cur_media->preview_uri[0] = '\0'; + cur_media->size = 0; + + /* process rev file */ + while(strcmp(cur_line,"end mouseUp") && gzeof(mediagz)==0) + { + strcpy(cur_line,""); /* reset cur_line */ + gzgets(mediagz, cur_line, STR_MAX); + cur_line[strlen(cur_line)-1] = '\0'; /* remove trailing newline */ + + removeleadingspace(cur_line); + + if(!strncmp(cur_line,"set the filename of player \"player1\" to \"",41)) + { + /* todo: check if smil, if so follow to find uri */ + sscanf(cur_line,"set the filename of player \"player1\" to \"%s\"",cur_media->uri); + cur_media->uri[strlen(cur_media->uri)-1] = '\0'; /* workaround extra character */ + } + else if(!strncmp(cur_line,"set the label of this stack to \"",32)) + { + getquote(cur_line,cur_media->title); + } + else if(!strncmp(cur_line,"statusMsg \(\"",12)) + { + getquote(cur_line,cur_media->comment); + } + } + } + strcpy(cur_line,""); /* reset cur_line */ + } + + return media; +} + +int smilurl(char * smilurl, med * cur_media) +/* Extracts url and other data from remote smil file, storing + * the info in the cur_media structure. */ +{ + return 0; +} + +void getquote(char * input, char * quote) +/* sets quote from a line of the format: + * `something "quote" something' */ +{ + char * cur_pos; + cur_pos = quote; + + /* advance until " character is reached */ + while(*input != '"' && *input) + input++; + + input++; + + /* copy characters in until next '"' */ + while(*input != '"' && *input) + { + *cur_pos = *input; + cur_pos++; + input++; + } + + *cur_pos = '\0'; +} + +void removeleadingspace(char * cur_line) +{ + int tmp, newpos; + + char temp_str[STR_MAX]; + + /* advance past whitespace */ + tmp = 0; + while (cur_line[tmp] == ' ' || cur_line[tmp] == '\t') + tmp++; + + /* copy from there to temp_str */ + for(newpos = 0; cur_line[tmp]; tmp++, newpos++) + temp_str[newpos] = cur_line[tmp]; + + temp_str[newpos] = '\0'; + + /* copy temp_str to cur_line */ + strncpy(cur_line, temp_str, sizeof(temp_str)); +} diff --git a/src/mediaxml.c b/src/mediaxml.c new file mode 100644 index 0000000..bcb2da1 --- /dev/null +++ b/src/mediaxml.c @@ -0,0 +1,246 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "getht.h" +#include "issue.h" + +int media_accounted_for(char * filepath, issdates * date) +/* checks if media for issue is found */ +{ + xmlDocPtr media_file; + xmlNodePtr node; + + if(ready_xml(filepath, "media", &media_file, &node)) + return 1; + + *node = *node->xmlChildrenNode; + + issdates curdate; + int found = 1; + + while (node != NULL) + { + if(!xmlStrcmp(node->name,(char *) "issue")) + { + curdate.year = atoi( (char *) xmlGetProp(node, "year")); + curdate.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); + curdate.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); + } + + if( curdate.year == date->year && + curdate.firstmonth == date->firstmonth && + curdate.lastmonth == date->lastmonth ) + { + found = 0; + break; + } + + node = node->next; + } + + xmlFreeDoc(media_file); + + return found; +} + +int addmediaissue(char * filepath, char * title, issdates * date, med ** media, int med_no) +/* Appends data from media structures to xml file. */ +{ + xmlDocPtr media_file; + xmlNodePtr node; + + /* if xml file doesn't exist */ + if(ready_xml(filepath, "media", &media_file, &node)) + { + /* set up fresh xml file */ + media_file = xmlNewDoc(NULL); + node = xmlNewNode(NULL, "media"); + xmlDocSetRootElement(media_file, node); + } + + xmlNodePtr curissue; + char tmp[5]; + + /* set up issue node */ + curissue = xmlNewTextChild(node, NULL, "issue", NULL); + + xmlNewProp(curissue, "title", title); + + snprintf(tmp,5,"%i", date->year); + xmlNewProp(curissue, "year", tmp); + + snprintf(tmp,5,"%i",date->firstmonth); + xmlNewProp(curissue, "firstmonth", tmp); + + snprintf(tmp,5,"%i",date->lastmonth); + xmlNewProp(curissue, "lastmonth", tmp); + + xmlNodePtr curitem; + + int count; + for(count = 0; count <= med_no; count++) + { + curitem = xmlNewTextChild(curissue, NULL, "item", media[count]->title); + + xmlNewProp(curitem, "uri", media[count]->uri); + + if(media[count]->comment) + xmlNewProp(curitem, "comment", media[count]->comment); + if(media[count]->preview_uri) + xmlNewProp(curitem, "preview_uri", media[count]->preview_uri); + } + + xmlKeepBlanksDefault(0); + + xmlSaveFormatFile(filepath, media_file, 1); + + xmlFreeDoc(media_file); + + return 0; +} + +iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues) +/* Fills issues with relevant info from media xml, creating new + ones where necessary. */ +{ + issdates tmpdate; + + iss * cur_issue; med * cur_media; + + xmlDocPtr media_file; + xmlNodePtr node, itnode; + + if(ready_xml(filepath, "media", &media_file, &node)) + return NULL; + + *node = *node->xmlChildrenNode; + + int issue_there = 0; + + char title[STR_MAX]; + issdates curdate; + int tmp; + + while (node != NULL) + { + if(!xmlStrcmp(node->name,(char *) "issue")) + { + /* check if issue with title already exists */ + for(tmp = 0; tmp < *no_of_issues; tmp++) + { + curdate.year = atoi( (char *) xmlGetProp(node, "year")); + curdate.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); + curdate.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); + + if( curdate.year == issue[tmp]->date.year && + curdate.firstmonth == issue[tmp]->date.firstmonth && + curdate.lastmonth == issue[tmp]->date.lastmonth ) + { + issue_there = 1; + break; + } + } + + if(!issue_there) + { /* advance to the next free issue */ + iss ** tmpiss = NULL; + if(*no_of_issues == -1) + { /* make issue** a new array of issue pointers */ + if( (tmpiss = malloc(sizeof(iss *))) == NULL ) + nogo_mem(); + } + else + { /* add a new pointer to issue pointer list */ + if( (tmpiss = realloc(issue, sizeof(iss *) + (*no_of_issues * sizeof(iss *)))) == NULL ) + nogo_mem(); + } + + (*no_of_issues)++; + + /* make new array item a pointer to issue */ + if( (tmpiss[*no_of_issues] = malloc(sizeof(iss))) == NULL ) + nogo_mem(); + + issue = tmpiss; + + issue[*no_of_issues]->date.year = atoi( (char *) xmlGetProp(node, "year")); + issue[*no_of_issues]->date.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); + issue[*no_of_issues]->date.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); + + strncpy(issue[*no_of_issues]->title, (char *) xmlGetProp(node, "title"), STR_MAX); + + tmp = *no_of_issues; + } + + iss * cur_issue = issue[tmp]; + + issue[tmp]->no_of_media = -1; + + itnode = node->xmlChildrenNode; + + while (itnode != NULL) + { + + if(!xmlStrcmp(itnode->name,(char *) "item")) + { + /* assign memory for the new media */ + cur_issue->media = assignnew_med(cur_issue->media, &(cur_issue->no_of_media)); + + /* setup media globals */ + cur_media = cur_issue->media[cur_issue->no_of_media]; + + cur_media->uri[0] = '\0'; + cur_media->title[0] = '\0'; + cur_media->comment[0] = '\0'; + cur_media->preview_uri[0] = '\0'; + cur_media->size = 0; + + /* add media info to cur_media */ + if(xmlGetProp(itnode, "uri")) + strncpy(cur_media->uri, (char *) xmlGetProp(itnode, "uri"), STR_MAX); + + if(xmlGetProp(itnode, "comment")) + strncpy(cur_media->comment, (char *) xmlGetProp(itnode, "comment"), STR_MAX); + + if(xmlGetProp(itnode, "preview")) + strncpy(cur_media->preview_uri, (char *) xmlGetProp(itnode, "preview_uri"), STR_MAX); + + strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX); + } + + itnode = itnode->next; + } + } + + node = node->next; + } + + xmlFreeDoc(media_file); + + issuesort(issue, no_of_issues); + + return issue; +} diff --git a/src/tocxml.c b/src/tocxml.c new file mode 100644 index 0000000..3740326 --- /dev/null +++ b/src/tocxml.c @@ -0,0 +1,264 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "issue.h" +#include "getht.h" + +iss ** parsetoc(char *filepath, int * iss_no, int * latest); +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest); +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section); + +void tokenise_hyphons(char to_token[10], int * first, int * last); + +int no_of_issues; + +iss ** parsetoc(char *filepath, int * iss_no, int * latest) +/* starts parsing of xml to issue structure */ +{ + xmlDocPtr file; + xmlNodePtr node; + + if(ready_xml(filepath, "issues", &file, &node)) + return NULL; + + *node = *node->xmlChildrenNode; + + no_of_issues = -1; + + iss ** issue = NULL; + + int year; + + xmlNodePtr cnode; + + while(node != NULL) + { + if(!xmlStrncmp(node->name,(char *) "year",4)) + { + cnode = node->children; + while(cnode != NULL) + { + if(!xmlStrncmp(cnode->name,(char *) "issue",5)) + { + /* assign memory for the new issue */ + issue = assignnew_iss(issue, &no_of_issues); + + /* setup issue globals */ + issue[no_of_issues]->no_of_media = -1; + issue[no_of_issues]->no_of_sections = -1; + issue[no_of_issues]->date.year = + atoi( (const char *)(xmlStrsub(node->name,5,4)) ); + tokenise_hyphons( + xmlStrsub(cnode->name,6,5), + &(issue[no_of_issues]->date.firstmonth), + &(issue[no_of_issues]->date.lastmonth)); + + /* parse the issue */ + parseissue(file, cnode, issue[no_of_issues], latest); + } + cnode = cnode->next; + } + } + node = node->next; + } + + xmlFreeDoc(file); + + *iss_no = no_of_issues; + + issuesort(issue, no_of_issues); + + return issue; +} + +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest) +/* parses issue from xml, saving in cur_issue structure */ +{ + strncpy(cur_issue->title, (char *) xmlGetProp(node, "title"), STR_MAX); + strncpy(cur_issue->preview_uri, (char *) xmlGetProp(node, "coverlink"), STR_MAX); + + if(xmlGetProp(node, "current") && *latest==-1) + *latest = no_of_issues; + + node = node->xmlChildrenNode; + + while(node != NULL){ + if(!xmlStrncmp(node->name, (char *) "section",7) || + !xmlStrcmp(node->name, (const xmlChar *) "cover")) + { + /* assign memory for new section */ + cur_issue->section = + assignnew_sec(cur_issue->section, &(cur_issue->no_of_sections)); + + /* setup section globals */ + cur_issue->section[cur_issue->no_of_sections]->no_of_items = -1; + + /* parse the section */ + parsesection(file, node, cur_issue->section[cur_issue->no_of_sections]); + } + node = node->next; + } + + return 0; +} + +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section) +/* parses section xml, filling cur_section structure */ +{ + it * cur_item = NULL; + + strncpy(cur_section->uri, (char *) xmlGetProp(node, "pdflink"), STR_MAX); + strncpy(cur_section->title, (char *) xmlGetProp(node, "title"), STR_MAX); + + if(!xmlStrcmp(node->name, (const xmlChar *) "cover")) + cur_section->number = 0; + else + cur_section->number = atoi( (const char *)(xmlStrsub(node->name,8,1)) ); + + node = node->xmlChildrenNode; + + char * pagenums; + + it ** tmp = NULL; + + while(node != NULL) + { + if(!xmlStrcmp(node->name, (const xmlChar *) "item")) + { + if(xmlNodeListGetString(file, node->xmlChildrenNode, 1) != NULL) + /* ignore items without titles */ + { + /* assign memory for new item */ + cur_section->item = + assignnew_it( cur_section->item, &(cur_section->no_of_items)); + + cur_item = cur_section->item[cur_section->no_of_items]; + + /* parse item */ + cur_item->title = xmlNodeListGetString(file, node->xmlChildrenNode, 1); + if(pagenums = (char *) xmlGetProp(node, "pages")) + tokenise_hyphons(pagenums, &(cur_item->firstpage), &(cur_item->lastpage)); + else + { + cur_item->firstpage = 0; + cur_item->lastpage = 0; + } + } + } + node = node->next; + } +} + +void tokenise_hyphons(char to_token[10], int * first, int * last) +/* splits string to_token, filling positions passed */ +{ + char token[10]; + char * tmp; + + tmp = token; + + while(*to_token != '-' && *to_token) + { + *tmp = *to_token; + to_token++; tmp++; + } + + *first = atoi(token); + + if(!*to_token) + *last = *first; + else + { + to_token++; /* advance past '-' */ + tmp = token; /* reset tmp */ + while(*to_token) + { + *tmp = *to_token; + tmp++; to_token++; + } + *last = atoi(token); + } +} + +int cur_identifiers(char * filepath, char * title, issdates * date) +/* parses xml file to ascertain current issue title and date */ +{ + xmlDocPtr file; + + if((file = xmlParseFile(filepath)) == NULL) + { + return 1; + } + + xmlNodePtr node,cnode; + + node = xmlDocGetRootElement(file); + + if(node == NULL) + { + fprintf(stderr,"Error: xml file %s has no root element",filepath); + xmlFreeDoc(file); + return 1; + } + + if(xmlStrcmp(node->name, (const xmlChar *) "issues")) + { + fprintf(stderr,"Document of the wrong type, root node is '%s' (expected 'issues').\n",(char *) node->name); + fprintf(stderr,"Continuing regardless...\n"); + } + + /* Now that's all sorted, let's do some work */ + + node = node->xmlChildrenNode; + + xmlChar *temp; + while(node != NULL) + { + if(!xmlStrncmp(node->name,(char *) "year",4)) + { + cnode = node->children; + while(cnode != NULL) + { + if(!xmlStrncmp(cnode->name,(char *) "issue",5)) + { + temp = xmlGetProp(cnode, "current"); + if(temp) + { + strncpy(title, (char *) xmlGetProp(cnode, "title"), STR_MAX); + date->year = atoi( (const char *)(xmlStrsub(node->name,5,4)) ); + tokenise_hyphons(xmlStrsub(cnode->name,6,5), &(date->firstmonth), &(date->lastmonth)); + return 0; + } + } + cnode = cnode->next; + } + } + node = node->next; + } + + return 0; +} diff --git a/src/version.h b/src/version.h new file mode 100644 index 0000000..0713569 --- /dev/null +++ b/src/version.h @@ -0,0 +1,22 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#define VERSION "0.0.1" diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..1a3ea3c --- /dev/null +++ b/src/xml.c @@ -0,0 +1,62 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "getht.h" +#include "issue.h" + +int ready_xml(char * filepath, char * rootnode, xmlDocPtr * file, xmlNodePtr * node) +/* Opens filepath, filling nec pointers with essential information. */ +{ + if(access(filepath, R_OK) != 0) + return 1; + + if((*file = xmlParseFile(filepath)) == NULL) + { + fprintf(stderr, "Error: cannot open xml file %s\n", filepath); + return 1; + } + + *node = xmlDocGetRootElement(*file); + + if(*node == NULL) + { + fprintf(stderr,"Error: xml file %s has no root element",filepath); + xmlFreeDoc(*file); + return 1; + } + + char * test; + test = (char *) (*node)->name; + + if(xmlStrcmp((*node)->name, (const xmlChar *) rootnode)) + { + fprintf(stderr,"Document of the wrong type, root node is '%s' (expected '%s').\n",(char *) (*node)->name, rootnode); + fprintf(stderr,"Continuing regardless...\n"); + } + + return 0; +} |