diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/Makefile.am | 7 | ||||
| -rw-r--r-- | src/config.c | 168 | ||||
| -rw-r--r-- | src/download.c | 228 | ||||
| -rw-r--r-- | src/getht.c | 394 | ||||
| -rw-r--r-- | src/getht.h | 47 | ||||
| -rw-r--r-- | src/issue.h | 77 | ||||
| -rw-r--r-- | src/issuemem.c | 174 | ||||
| -rw-r--r-- | src/mediarev.c | 150 | ||||
| -rw-r--r-- | src/mediaxml.c | 246 | ||||
| -rw-r--r-- | src/tocxml.c | 264 | ||||
| -rw-r--r-- | src/version.h | 22 | ||||
| -rw-r--r-- | src/xml.c | 62 | 
12 files changed, 1839 insertions, 0 deletions
| diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..0bf3452 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,7 @@ +## A simple file to process with Automake, to produce Makefile.in + +bin_PROGRAMS = getht + +getht_SOURCES = config.c download.c getht.c issuemem.c	\ +		mediarev.c mediaxml.c tocxml.c xml.c	\ +		getht.h issue.h version.h diff --git a/src/config.c b/src/config.c new file mode 100644 index 0000000..57807b1 --- /dev/null +++ b/src/config.c @@ -0,0 +1,168 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <string.h> + +#include "getht.h" + +extern proxytype proxy_type; +extern char proxy_addr[STR_MAX]; +extern long proxy_port; +extern proxyauth proxy_auth; +extern char proxy_user[STR_MAX]; +extern char proxy_pass[STR_MAX]; + +int loadconfig(char * htde_path, char * issue_path, int * update) +/*	Loads variables from config file to extern and passed + *	variables. */ +{ +	FILE * config_file; +	char filepath[STR_MAX]; + +	snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + +	if((config_file = fopen(filepath,"r")) == NULL) +	{ +		fprintf(stderr,"Cannot open file %s for reading.\n",filepath); +		return 1; +	} + +	char parameter[80], option[80]; +	while(!feof(config_file)) +	{ +		fscanf(config_file, "%s = %s", option, parameter); + +		if(option[0] == '#');	/* ignore lines beginning with a hash */ +		else if(!strcmp(option, "issuepath")) +			strncpy(issue_path, parameter, STR_MAX); +		else if(!strcmp(option, "startup_check")) +			*update = atoi(parameter); +		else if(!strcmp(option, "proxy_type")) +		{ +			if(!strcmp(parameter, "http")) +				proxy_type = HTTP; +			else if(!strcmp(parameter, "socks4")) +				proxy_type = SOCKS4; +			else if(!strcmp(parameter, "socks5")) +				proxy_type = SOCKS5; +			else +				fprintf(stderr, +					"Proxy type %s not known, please use either http, socks4 or socks5", +					parameter); +		} +		else if(!strcmp(option, "proxy_address")) +			strncpy(proxy_addr, parameter, STR_MAX); +		else if(!strcmp(option, "proxy_port")) +			proxy_port = (long) atoi(parameter); +		else if(!strcmp(option, "proxy_auth")) +		{ +			if(!strcmp(parameter, "basic")) +				proxy_auth = BASIC; +			else if(!strcmp(parameter, "digest")) +				proxy_auth = DIGEST; +			else if(!strcmp(parameter, "ntlm")) +				proxy_auth = NTLM; +			else +				fprintf(stderr, +					"Proxy authentication method %s not known, please use basic, digest or ntlm", +					parameter); +		} +		else if(!strcmp(option, "proxy_user")) +			strncpy(proxy_user, parameter, STR_MAX); +		else if(!strcmp(option, "proxy_pass")) +			strncpy(proxy_pass, parameter, STR_MAX); +		else +			fprintf(stderr, "Option '%s' not recognised, ignoring\n", option); +	} + +	return 0; +} + +int writefreshconfig(char * htde_path, char * issue_path, int * update) +/*	Write a new config file according to extern and passed variables. */ +{ +	FILE * config_file; +	char filepath[STR_MAX]; + +	snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + +	if((config_file = fopen(filepath,"w")) == NULL) +	{ +		fprintf(stderr,"Cannot open file %s for writing.\n",filepath); +		return 1; +	} +	else +		fprintf(stdout,"Writing a fresh config file to %s.\n",filepath); + +	if(issue_path[0]) +		fprintf(config_file, "%s = %s\n", "issuepath", issue_path); +	if(update) +		fprintf(config_file, "%s = %i\n", "startup_check", *update); +	if(proxy_type != NONE) +	{ +		if(proxy_type = HTTP) +			fprintf(config_file, "%s = %s\n", "proxy_type", "http"); +		else if(proxy_type = SOCKS4) +			fprintf(config_file, "%s = %s\n", "proxy_type", "socks4"); +		else if(proxy_type = SOCKS5) +			fprintf(config_file, "%s = %s\n", "proxy_type", "socks5"); +	} +	if(proxy_addr[0]) +		fprintf(config_file, "%s = %s\n", "proxy_address", proxy_addr); +	if(proxy_port) +		fprintf(config_file, "%s = %i\n", "proxy_port", proxy_port); +	if(proxy_auth != NOAUTH) +	{ +		if(proxy_auth = BASIC) +			fprintf(config_file, "%s = %s\n", "proxy_auth", "basic"); +		else if(proxy_auth = DIGEST) +			fprintf(config_file, "%s = %s\n", "proxy_auth", "digest"); +		else if(proxy_auth = NTLM) +			fprintf(config_file, "%s = %s\n", "proxy_auth", "ntlm"); +	} +	if(proxy_user[0]) +		fprintf(config_file, "%s = %s\n", "proxy_user", proxy_user); +	if(proxy_pass[0]) +		fprintf(config_file, "%s = %s\n", "proxy_pass", proxy_pass); +		 +	return 0; +} + +int updateconfig(char * htde_path, char * issue_path, int * update) +/*	Read existing config file, and rewrite any variables which differ + *	in memory. */ +{ +	FILE * config_file; +	char filepath[STR_MAX]; + +	snprintf(filepath, STR_MAX, "%s/config.ini", htde_path); + +	if((config_file = fopen(filepath,"rw")) == NULL) +	{ +		fprintf(stderr,"Cannot open file %s for reading/writing.\n",filepath); +		return 1; +	} + +	fprintf(stderr,"Not yet implemented.\n"); + +	return 1; +} diff --git a/src/download.c b/src/download.c new file mode 100644 index 0000000..1108b6d --- /dev/null +++ b/src/download.c @@ -0,0 +1,228 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <curl/curl.h> +#include <curl/types.h> +#include <curl/easy.h> + +#include "getht.h" +#include "issue.h" + +int read_func(void *ptr, size_t size, size_t nmemb, FILE *stream) +	{ return fread(ptr, size, nmemb, stream); } +int write_func(void *ptr, size_t size, size_t nmemb, FILE *stream) +	{ return fwrite(ptr, size, nmemb, stream); } +int update_progress(void *data, double dltotal, double dlnow, +					double ultotal, double ulnow); + +extern proxytype proxy_type; +extern char proxy_addr[STR_MAX]; +extern long proxy_port; +extern proxyauth proxy_auth; +extern char proxy_user[STR_MAX]; +extern char proxy_pass[STR_MAX]; +extern CURL *main_curl_handle; + +int save_file(CURL *curl_handle, char *url, char *filepath) +/*	Save the file *url to *filepath */ +{ +	printf("Downloading %s\n",url); + +	if(curl_handle == NULL) +		curl_handle = main_curl_handle; + +	if(curl_handle) { +		FILE *file; +		if((file = fopen(filepath,"w")) == NULL) +		{ +			fprintf(stderr,"Error: cannot open file %s for writing.\n",filepath); +			return 1; +		} + +		curl_easy_setopt(curl_handle, CURLOPT_URL, url); +		curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_func); +		curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, file); +		curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, read_func); + +		if(proxy_type != NONE) +		{ +			if(proxy_type == HTTP) +				curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_HTTP); +			else if(proxy_type == SOCKS4) +				curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4); +			else if(proxy_type == SOCKS5) +				curl_easy_setopt(curl_handle, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5); + +			curl_easy_setopt(curl_handle, CURLOPT_PROXY, proxy_addr); + +			if(proxy_port) +				curl_easy_setopt(curl_handle, CURLOPT_PROXYPORT, proxy_port); + +			if(proxy_auth != NOAUTH) +			{ +				if(proxy_auth == BASIC) +					curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_BASIC); +				else if(proxy_auth == DIGEST) +					curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_DIGEST); +				else if(proxy_auth == NTLM) +					curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_NTLM); + +				if(proxy_user[0] && proxy_pass[0]) +				{ +					char userpass[STR_MAX]; +					snprintf(userpass, STR_MAX, "%s:%s", proxy_user, proxy_pass); +					curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, userpass); +				} +			} +		} + +		curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 0); +		curl_easy_setopt(curl_handle, CURLOPT_PROGRESSFUNCTION, update_progress); + +		if(curl_easy_perform(curl_handle)) +		{ +			remove(filepath); +			fprintf(stderr,"Error, could not download %s\n",url); +			return 1; +		} + +/*		double d; +		curl_easy_getinfo(curl_handle, CURLINFO_SIZE_DOWNLOAD, &d); +		printf("Total downloaded: %lf\n",d); + +		curl_easy_getinfo(curl_handle, CURLINFO_SPEED_DOWNLOAD, &d); +		printf("Average speed downloaded: %lf\n",d); + +		curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d); +		printf("Content Length: %lf\n",d); +*/ +		fclose(file); + +		printf("\n"); +	} +	else { +		fprintf(stderr,"Error: curl failed to initialise.\n"); +		printf("Could not download %s\n",url); +		return 1; +	} +	return 0; +} + +int update_progress(void *data, double dltotal, double dlnow, +				double ultotal, double ulnow) +/*	Print status information */ +{ +	double frac; +	frac = 100 * dlnow / dltotal; +	printf("\rDownload progress: %3.0lf%% ", frac); +	fflush(stdout); + +	return 0; +} + +void downloadissue(CURL *curl_handle, char * directory, iss * issue, int force) +/*	Download issue pointed to */ +{ +	sec * cur_section; +	char newdir[STR_MAX]; +	char filename[STR_MAX]; +	FILE * testfile; + +	snprintf(newdir,STR_MAX,"%s/%i_%i-%i",directory, +		issue->date.year,issue->date.firstmonth,issue->date.lastmonth); + +	printf("Downloading issue entitled '%s' to '%s'\n",issue->title,newdir); + +	if(!opendir(newdir)) +		if(mkdir(newdir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) +		{ +			printf("Please enter the path of a directory to save issues in: "); +			scanf("%s", newdir); /* TODO: incorporate tab-completion */ +		} + +	int count; +	for(count = 0; count <= issue->no_of_sections; count++) +	{ +		cur_section = issue->section[count]; + +		snprintf(filename,STR_MAX,"%s/section_%i.pdf", newdir, cur_section->number); +		if(!force){ +			testfile = fopen(filename, "r"); +			if(!testfile) +				save_file(curl_handle, cur_section->uri, filename); +			else +			{ +				fclose(testfile); +				printf("Skipping download of section %i\n", cur_section->number); +			} +		} +		else +			save_file(curl_handle, cur_section->uri, filename); +	} +} + +void downloadmedia(CURL * curl_handle, char * directory, med * media, int force) +/*	Download media pointed to. */ +{ +	char newdir[STR_MAX], filename[STR_MAX], fn[STR_MAX]; +	FILE * testfile; + +	/* for the moment just save to down/media, in future save to issue directories +	 * (regardless of if they're occupied) */ +	snprintf(newdir,STR_MAX,"%s/%s",directory,"media");  + +	if(!opendir(newdir)) +		if(mkdir(newdir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) +		{ +			printf("Please enter the path of a directory to save media in: "); +			scanf("%s", newdir); /* TODO: incorporate GETTEXT */ +		} + +	/* get the position of the final forwardslash of the uri */ +	int slashpos; +	for(slashpos=strlen(media->uri); +		slashpos>=0 && media->uri[slashpos] != '/'; +		slashpos--); + +	/* copy from there to fn */ +	int tmp; +	for(tmp=0, slashpos++; media->uri[slashpos]; tmp++, slashpos++) +		fn[tmp] = media->uri[slashpos]; +	fn[tmp] = '\0'; + +	snprintf(filename,STR_MAX,"%s/%s", newdir, fn); + +	if(!force){ +		testfile = fopen(filename, "r"); +		if(!testfile) +			save_file(curl_handle, media->uri, filename); +		else +		{ +			fclose(testfile); +			printf("Skipping download of media item %s\n", media->title); +		} +	} +	else +		save_file(curl_handle, media->uri, filename); +} diff --git a/src/getht.c b/src/getht.c new file mode 100644 index 0000000..5d2c39c --- /dev/null +++ b/src/getht.c @@ -0,0 +1,394 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> +#include <string.h> +#include <sys/stat.h> +#include <curl/curl.h> + +#include "version.h" +#include "issue.h" +#include "getht.h" + +int update_contents_files(); + +med * findnewestmed(iss ** issue, int no_of_issues); + +void show_iss_struct(iss ** issue, int no_of_issues); + +void showusage(); + +proxytype proxy_type; char proxy_addr[STR_MAX]; long proxy_port; +proxyauth proxy_auth;  +char proxy_user[STR_MAX]; char proxy_pass[STR_MAX]; +char issue_xml[STR_MAX]; char media_xml[STR_MAX]; char media_rev[STR_MAX]; +CURL *main_curl_handle;  + +int main(int argc, char *argv[]) +{ +	char getht_path[STR_MAX]; +	char save_path[STR_MAX]; + +	/* Define & set up paths */ +	snprintf(getht_path,STR_MAX,"%s/.getht",getenv("HOME")); + +	if(!opendir(getht_path)) +		if(mkdir(getht_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) +		{ +			fprintf(stderr,"Cannot open/create directory %s\n",getht_path); +			printf("Please enter the path of a directory to save settings in: "); +			scanf("%s", getht_path); +		} + +	snprintf(issue_xml,STR_MAX,"%s/%s",getht_path,ISS_XML_FILE); +	snprintf(media_xml,STR_MAX, "%s/%s", getht_path, MED_XML_FILE); +	snprintf(media_rev,STR_MAX,"%s/%s",getht_path,MED_REVGZ_FILE); + +	snprintf(save_path,STR_MAX,"%s/hinduism_today",getenv("HOME")); + +	int downall = 0, downlatest = 0; +	int downallmedia = 0, downlatestmedia = 0; +	int downissue = 0, downmedia = 0; +	int force = 0, update = 0, showstr = 0; +	int option = 0; + +	proxy_type = NONE; +	proxy_port = 0; +	proxy_addr[0] = '\0'; +	proxy_auth = NOAUTH; +	proxy_user[0] = '\0'; +	proxy_pass[0] = '\0'; + +	if(loadconfig(getht_path, &save_path, &update) != 0) +		writefreshconfig(getht_path, &save_path, &update); + +	if(!opendir(save_path)) +		if(mkdir(save_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) +		{ +			fprintf(stderr,"Cannot open/create directory %s",save_path); +			printf("Please enter the path of a directory to save issues in: "); +			scanf("%s", save_path); + +			updateconfig(getht_path, &save_path, NULL); +		} + +	/* Parse command line options */ +	char c; +	static struct option long_opts[] = +	{ +		{"download-all", no_argument, 0, 'a'}, +		{"download-latest", no_argument, 0, 'd'}, +		{"download-all-media", no_argument, 0, 'm'}, +		{"download-latest-media", no_argument, 0, 'n'}, +		{"force", no_argument, 0, 'f'}, +		{"update", no_argument, 0, 'u'}, +		{"tocfile", required_argument, 0, 't'}, +		{"mediatocfile", required_argument, 0, 'x'}, +		{"help", no_argument, 0, 'h'}, +		{"version", no_argument, 0, 'v'}, +		{0, 0, 0, 0} +	}; +	while((c = getopt_long(argc, argv, "adfhmnsuvx:t:", long_opts, NULL)) != -1) { +		switch(c) { +			case 'a': +				downall = 1; +				downissue = 1; +				option = 1; +				break; +			case 'd': +				downlatest = 1; +				downissue = 1; +				option = 1; +				break; +			case 'm': +				downallmedia = 1; +				downmedia = 1; +				option = 1; +				break; +			case 'n': +				downlatestmedia = 1; +				downmedia = 1; +				option = 1; +				break; +			case 'f': +				force = 1; +				option = 1; +				break; +			case 'u': +				update = 1; +				option = 1; +				break; +			case 's': +				showstr = 1; +				option = 1; +				break; +			case 't': +				strcpy(issue_xml, strdup(optarg)); +				option = 1; +				break; +			case 'x': +				strcpy(media_xml, strdup(optarg)); +				option = 1; +				break; +			case 'h': +				showusage(); +				return 0; +				break; +			case 'v': +				printf("GetHT version: %s\n",VERSION); +				option = 1; +				return 0; +				break; +		} +	} + +	if(!option) +	{ +		showusage(); +		return 0; +	} + +	main_curl_handle = curl_easy_init(); + +	if(update) +	{ +		if(update_contents_files()) +			fprintf(stderr,"Could not update contents files\n"); +	} + +	/* Parse TOC, filling issue structure */ + +	iss **issue; +	int no_of_issues = -1; +	int latest_index = -1; +	int i; + +	if(downissue || showstr) +	{ +		issue = parsetoc(issue_xml, &no_of_issues, &latest_index); + +		if(!issue) +		{ +			if(!update) +			{ +				printf("Cannot open contents file, trying to update contents\n"); +				if(update_contents_files()) +					return 1; +				issue = parsetoc(issue_xml, &no_of_issues, &latest_index); +			} +			else +			{ +				printf("Cannot open contents file, try running `getht --update`\n"); +				return 1; +			} +		} + +		if(latest_index == -1) +		{ +			fprintf(stderr, "Error: Cannot ascertain latest issue. "); +			fprintf(stderr, "Defaulting to first issue in contents file\n"); +			latest_index = 0; +		} + +		if(downall) +		{ +			for(i = 0; i < no_of_issues; i++) +				downloadissue(NULL, save_path, issue[i], force); +		} +	 +		if(downlatest) +			downloadissue(NULL, save_path, issue[latest_index], force); +	} +	 +	if(downmedia || showstr) +	{ +		int newest; + +		issue = parsemedia(media_xml, issue, &no_of_issues); + +		if(!issue) +		{ +			if(!update) +			{ +				printf("Cannot open media contents file, trying to update contents\n"); +				if(update_contents_files()) +					return 1; +				issue = parsemedia(media_xml, issue, &no_of_issues); +			} +			else +			{ +				printf("Cannot open contents file, try running `getht --update`\n"); +				return 1; +			} +		} + +		if(downlatestmedia) +		{ +			newest = findnewestiss(issue, no_of_issues); +			for(i = 0; i <= issue[newest]->no_of_media; i++) +				downloadmedia(NULL, save_path, issue[newest]->media[i], force); +		} + +		if(downallmedia) +		{ +			for(i = 0; i <= no_of_issues; i++) +			{ +				for(newest = 0; newest <= issue[i]->no_of_media; newest++) +					downloadmedia(NULL, save_path, issue[i]->media[newest], force); +			} +		} +	} + +	if(showstr) +		show_iss_struct(issue, no_of_issues); + +	/* Ensure curl cleans itself up */ +	curl_easy_cleanup(main_curl_handle); + +	return 0; +} + +int update_contents_files() +/* Returns 0 on success, 1 on failure */ +{ +	if(save_file(NULL, XML_TOC_URL, issue_xml)) +		return 1; +	 +	char isstitle[STR_MAX]; +	issdates date; + +	/*	see if current issue's media toc has already +		been written to the xml, if not do so */ +	if(access(issue_xml, R_OK) == 0) +	{ +		if(cur_identifiers(issue_xml, isstitle, &date)) +			return 1; +	} +	else +		return 1; + +	if(media_accounted_for(media_xml, &date)) +	{ +		if(save_file(NULL, MEDIA_TOC_URL, media_rev)) +			return 1; +	 +		med ** temp_med; +		int med_no = -1; + +		temp_med = parsemediagz(media_rev, &med_no); + +		unlink(media_rev); + +		addmediaissue(media_xml, isstitle, &date, temp_med, med_no); +	} + +	return 0; +} + +void show_iss_struct(iss ** issue, int no_of_issues) +/*	Prints issue information */ +{ +	int iss_no, sec_no, med_no, it_no; +	printf("%i Issues\n",no_of_issues); +	for(iss_no=0;iss_no<=no_of_issues;iss_no++) +	{ +		printf("-Issue %i-\n", (iss_no+1)); +		printf("Title:\t'%s'\n", issue[iss_no]->title); +		printf("Preview URI:\t'%s'\n", issue[iss_no]->preview_uri); +		printf("Months:\t'%i' - '%i'\n",issue[iss_no]->date.firstmonth,issue[iss_no]->date.lastmonth); +		printf("Year:\t'%i'\n",issue[iss_no]->date.year); +		printf("Number of Sections:\t'%i'\n",issue[iss_no]->no_of_sections); + +		for(sec_no=0; sec_no <= (issue[iss_no]->no_of_sections); sec_no++) +		{ +			printf("\t-Section %i-\n", (sec_no)); +			printf("\tTitle:\t'%s'\n", issue[iss_no]->section[sec_no]->title); +			printf("\tURI:\t'%s'\n", issue[iss_no]->section[sec_no]->uri); +			printf("\tNo. of Items:\t'%i'\n", issue[iss_no]->section[sec_no]->no_of_items); + +			for(it_no=0; it_no <= issue[iss_no]->section[sec_no]->no_of_items; it_no++) +			{ +				printf("\t\t-Item-\n"); +				printf("\t\tTitle:\t'%s'\n",issue[iss_no]->section[sec_no]->item[it_no]->title); +				printf("\t\tFirst page:\t'%i'",issue[iss_no]->section[sec_no]->item[it_no]->firstpage); +				printf("\tLast page:\t'%i'\n",issue[iss_no]->section[sec_no]->item[it_no]->lastpage); +			} +			it_no = 0; +		} +		sec_no = 0; + +		printf("Number of Media:\t'%i'\n",issue[iss_no]->no_of_media); + +		for(med_no=0; med_no <= (issue[iss_no]->no_of_media); med_no++) +		{ +			printf("\t-Media %i-\n", (med_no)); +			printf("\tTitle:\t'%s'\n", issue[iss_no]->media[med_no]->title); +			printf("\tURI:\t'%s'\n", issue[iss_no]->media[med_no]->uri); +			printf("\tComment:\t'%s'\n", issue[iss_no]->media[med_no]->comment); +			printf("\tPreview URI:\t'%s'\n", issue[iss_no]->media[med_no]->preview_uri); +		} +		med_no = 0; +	} +} + +int findnewestiss(iss ** issue, int no_of_issues) +/*	returns newest issue indice */ +{ +	iss * tmp_issue; issdates newest; +	int new_iss; + +	new_iss = -1; +	newest.year = 0; newest.firstmonth = 0; newest.lastmonth = 0; + +	int i; + +	for(i = 0; i <= no_of_issues; i++) +	{ +		if(issue[i]->date.year > newest.year || +			(issue[i]->date.year == newest.year && issue[i]->date.firstmonth > newest.firstmonth)) +		{ +			newest.year = issue[i]->date.year; +			newest.firstmonth = issue[i]->date.firstmonth; +			newest.lastmonth = issue[i]->date.lastmonth; +			new_iss = i; +		} +	} + +	return new_iss; +} + +void showusage() +{ +	printf("Usage: getht -u -a -d -m -n -f [-t tocfile] -h -v\n"); +	printf("-u | --update                 Update contents files\n"); +	printf("-a | --download-all           Download all issues\n"); +	printf("-d | --download-latest        Download latest issue\n"); +	printf("-m | --download-all-media     Download all media\n"); +	printf("-n | --download-latest-media  Download latest issue's media\n"); +	printf("-f | --force                  Force re-download of existing files\n"); +	printf("-t | --tocfile file           Use alternative contents xml file\n"); +	printf("-x | --mediatocfile file      Use alternative media contents xml file\n"); +	printf("-h | --help                   Print this help message\n"); +	printf("-v | --version                Print version information\n"); +} diff --git a/src/getht.h b/src/getht.h new file mode 100644 index 0000000..df2b1b2 --- /dev/null +++ b/src/getht.h @@ -0,0 +1,47 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#define XML_TOC_URL		"http://www.hinduismtoday.com/digital/htde_toc.xml" +#define MEDIA_TOC_URL	"http://www.hinduismtoday.com/digital/htde_media-player.rev.gz" + +#define MED_XML_FILE	"media_toc.xml" +#define ISS_XML_FILE	"htde_toc.xml" +#define MED_REVGZ_FILE	"htde_media-player.rev.gz" + +#define STR_MAX		512 + +#define MAX_ISS		10 + +typedef enum +{ +	NONE, +	HTTP, +	SOCKS4, +	SOCKS5 +} proxytype; + +typedef enum +{ +	NOAUTH, +	BASIC, +	DIGEST, +	NTLM, +} proxyauth; diff --git a/src/issue.h b/src/issue.h new file mode 100644 index 0000000..d224dcf --- /dev/null +++ b/src/issue.h @@ -0,0 +1,77 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +/* all sizes are in bytes and are child-inclusive */ + +typedef struct +{ +	int year; +	int firstmonth; +	int lastmonth; +} issdates; + +typedef struct +{ +	int firstpage; +	int lastpage; +	char * title; +} it; + +typedef struct +{ +	char uri[512]; +	char title[512]; +	int number; +	int size; +	it ** item; +	int no_of_items; +} sec; + +typedef struct +{ +	char uri[512]; +	char title[512]; +	char comment[512]; +	char preview_uri[512]; +	int size; +} med; + +typedef struct +{ +	char preview_uri[512]; +	char title[512]; +	int size; +	issdates date; +	sec ** section; +	int no_of_sections; +	med ** media; +	int no_of_media; +} iss; + +iss ** parsetoc(char *filepath, int * iss_no, int * latest); +iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues); + +med ** parsemediagz(char * media_path, int * no_of_media); + +iss ** assignnew_iss(iss ** issue, int *no_of_issues); +sec ** assignnew_sec(sec ** section, int * no_of_sections); +it ** assignnew_it(it ** item, int * no_of_items); +med ** assignnew_med(med ** media, int * no_of_media); diff --git a/src/issuemem.c b/src/issuemem.c new file mode 100644 index 0000000..3a40d7e --- /dev/null +++ b/src/issuemem.c @@ -0,0 +1,174 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "issue.h" + +void nogo_mem() +/*	called if memory assignation fails + *	TODO: handle freeing of memory to avoid leaks */ +{ +	fprintf(stderr, "Could not assign memory, exitting\n"); +	exit(1); +} + +iss ** assignnew_iss(iss ** issue, int *no_of_issues) +/* assign memory for new issue */ +{ +	iss ** tmp = NULL; + +	if(*no_of_issues < 0) +	{       /* make issue** a new array of issue pointers */ +		if( (tmp = malloc(sizeof(iss *))) == NULL ) +			nogo_mem(); +	} +	else +	{       /* add a new pointer to issue pointer list */ +		if( (tmp = realloc(issue, sizeof(iss *) + (((*no_of_issues)+1) * sizeof(iss *)))) == NULL ) +			nogo_mem(); +	} + +	(*no_of_issues)++; +	 +	/* make new array item a pointer to issue */ +	if( (tmp[*no_of_issues] = malloc(sizeof(iss))) == NULL ) +		nogo_mem(); + +	return tmp; +} + +sec ** assignnew_sec(sec ** section, int *no_of_sections) +/* assign memory for new section */ +{ +	sec ** tmp = NULL; + +	if(*no_of_sections < 0) +	{       /* make **section a new array of section pointers */ +		if( (tmp = malloc(sizeof(sec *))) == NULL ) +			nogo_mem(); +	} +	else +	{       /* add a new pointer to section pointer list */ +		if( (tmp = realloc(section, sizeof(sec *) + (((*no_of_sections)+1) * sizeof(sec *)))) == NULL ) +			nogo_mem(); +	} +	 +	(*no_of_sections)++; + +	/* make new array item a pointer to issue */ +	if( (tmp[*no_of_sections] = malloc(sizeof(sec))) == NULL ) +		nogo_mem(); + +	return tmp; +} + +it ** assignnew_it(it ** item, int * no_of_items) +{ +	it ** tmp = NULL; + +	if(*no_of_items < 0) +	{       /* make **item a new array of item pointers */ +		if( (tmp = malloc(sizeof(it *))) == NULL ) +			nogo_mem(); +										                        } +	else +	{       /* add a new pointer to item pointer list */ +		if( (tmp = realloc(item, sizeof(it *) + (((*no_of_items)+1) * sizeof(it *)))) == NULL ) +			nogo_mem(); +	} + +	(*no_of_items)++; + +	/* make new array item a pointer to item */ +	if( (tmp[*no_of_items] = malloc(sizeof(it))) == NULL ) +		nogo_mem(); + +	return tmp; +} + +med ** assignnew_med(med ** media, int * no_of_media) +{ +	med ** tmp = NULL; + +	if(*no_of_media < 0) +	{       /* make **section a new array of section pointers */ +		if( (tmp = malloc(sizeof(med *))) == NULL ) +			nogo_mem(); +	} +	else +	{       /* add a new pointer to media pointer list */ +		if( (tmp = realloc(media, sizeof(med *) + (((*no_of_media)+1) * sizeof(med *)))) == NULL ) +			nogo_mem(); +	} + +	(*no_of_media)++;                                          + +	/* make new array item a pointer to issue */ +	if( (tmp[*no_of_media] = malloc(sizeof(med))) == NULL ) +		nogo_mem(); + +	return tmp; +} + +int issuesort(iss ** issue, int no_of_issues) +/* does a basic bubble sort, by date, returning sorted issue */ +{ +	int sortindex[no_of_issues]; + +	int count1, count2, temp; +	 +	for(count1 = 0; count1 <= no_of_issues; count1++) +		sortindex[count1] = count1; + +	/* find correct order of issues using a bubble sort */ +	for(count1 = 0; count1 <=no_of_issues; count1++) +	{ +		for(count2 = 0; count2 < no_of_issues; count2++) +		{ +			if(issue[sortindex[count2]]->date.year < issue[sortindex[count2+1]]->date.year) +			{ +				temp = sortindex[count2]; +				sortindex[count2] = sortindex[count2+1]; +				sortindex[count2+1] = temp; +			} +			else if((issue[sortindex[count2]]->date.year == issue[sortindex[count2+1]]->date.year) && +				(issue[sortindex[count2]]->date.firstmonth < issue[sortindex[count2+1]]->date.firstmonth)) +			{ +				temp = sortindex[count2]; +				sortindex[count2] = sortindex[count2+1]; +				sortindex[count2+1] = temp; +			} +		} +	} + +	iss * sortedissue[no_of_issues]; + +	for(count1 = 0; count1 <= no_of_issues; count1++) +		sortedissue[count1] = issue[sortindex[count1]]; + +	for(count1 = 0; count1 <= no_of_issues; count1++) +		issue[count1] = sortedissue[count1]; + +	return 0; +} diff --git a/src/mediarev.c b/src/mediarev.c new file mode 100644 index 0000000..e8dd3e6 --- /dev/null +++ b/src/mediarev.c @@ -0,0 +1,150 @@ +/* + * Copyright 2006 Nick White + * + * This mediagz is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <zlib.h> + +#include "getht.h" +#include "issue.h" + +int smilurl(char * smilurl, med * cur_media); +void getquote(char * input, char * label); +void removeleadingspace(char * cur_line); + +med ** parsemediagz(char * media_path, int * no_of_media) +/*	Parses gzipped adobe pagemaker files for media urls and metadata, + *	filling media with the information. */ +{ +	char cur_line[STR_MAX]; +	gzFile mediagz; + +	med ** media = NULL; + +	strcpy(cur_line,""); /* reset cur_line */ + +	mediagz = gzopen(media_path, "r"); + +	med * cur_media; + +	while(gzeof(mediagz)==0) +	{ +		gzgets(mediagz, cur_line, STR_MAX); +		cur_line[strlen(cur_line)-1] = '\0'; /* get rid of trailing newline */ + +		if(!strcmp(cur_line,"on mouseUp")) +		{ +			strcpy(cur_line,""); /* reset cur_line */ + +			/* assign memory for the new media */ +			media = assignnew_med(media, no_of_media); + +			cur_media = media[*no_of_media]; + +			/* setup media globals */ +			cur_media->uri[0] = '\0'; +			cur_media->title[0] = '\0'; +			cur_media->comment[0] = '\0'; +			cur_media->preview_uri[0] = '\0'; +			cur_media->size = 0; + +			/* process rev file */ +			while(strcmp(cur_line,"end mouseUp") && gzeof(mediagz)==0) +			{ +				strcpy(cur_line,""); /* reset cur_line */ +				gzgets(mediagz, cur_line, STR_MAX); +				cur_line[strlen(cur_line)-1] = '\0'; /* remove trailing newline */ + +				removeleadingspace(cur_line); + +				if(!strncmp(cur_line,"set the filename of player \"player1\" to \"",41)) +				{ +					/* todo: check if smil, if so follow to find uri */ +					sscanf(cur_line,"set the filename of player \"player1\" to \"%s\"",cur_media->uri); +					cur_media->uri[strlen(cur_media->uri)-1] = '\0'; /* workaround extra character */ +				} +				else if(!strncmp(cur_line,"set the label of this stack to \"",32)) +				{ +					getquote(cur_line,cur_media->title); +				} +				else if(!strncmp(cur_line,"statusMsg \(\"",12)) +				{ +					getquote(cur_line,cur_media->comment); +				} +			} +		} +		strcpy(cur_line,""); /* reset cur_line */ +	} + +	return media; +} + +int smilurl(char * smilurl, med * cur_media) +/*	Extracts url and other data from remote smil file, storing + *	the info in the cur_media structure. */ +{ +	return 0; +} + +void getquote(char * input, char * quote) +/*	sets quote from a line of the format: + *	`something "quote" something' */ +{ +	char * cur_pos; +	cur_pos = quote; + +	/* advance until " character is reached */ +	while(*input != '"' && *input)	 +		input++; + +	input++; + +	/* copy characters in until next '"' */ +	while(*input != '"' && *input) +	{ +		*cur_pos = *input; +		cur_pos++; +		input++; +	} + +	*cur_pos = '\0'; +} + +void removeleadingspace(char * cur_line) +{ +	int tmp, newpos; + +	char temp_str[STR_MAX]; + +	/* advance past whitespace */ +	tmp = 0; +	while (cur_line[tmp] == ' ' || cur_line[tmp] == '\t') +		tmp++; + +	/* copy from there to temp_str */ +	for(newpos = 0; cur_line[tmp]; tmp++, newpos++) +		temp_str[newpos] = cur_line[tmp]; + +	temp_str[newpos] = '\0'; + +	/* copy temp_str to cur_line */ +	strncpy(cur_line, temp_str, sizeof(temp_str)); +} diff --git a/src/mediaxml.c b/src/mediaxml.c new file mode 100644 index 0000000..bcb2da1 --- /dev/null +++ b/src/mediaxml.c @@ -0,0 +1,246 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "getht.h" +#include "issue.h" + +int media_accounted_for(char * filepath, issdates * date) +/*	checks if media for issue is found */ +{ +	xmlDocPtr media_file; +	xmlNodePtr node; + +	if(ready_xml(filepath, "media", &media_file, &node)) +		return 1; + +	*node = *node->xmlChildrenNode; + +	issdates curdate; +	int found = 1; + +	while (node != NULL) +	{ +		if(!xmlStrcmp(node->name,(char *) "issue")) +		{ +			curdate.year = atoi( (char *) xmlGetProp(node, "year")); +			curdate.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); +			curdate.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); +		} + +		if( curdate.year == date->year && +			curdate.firstmonth == date->firstmonth && +			curdate.lastmonth == date->lastmonth ) +		{ +			found = 0; +			break; +		} + +		node = node->next; +	} + +	xmlFreeDoc(media_file); + +	return found; +} + +int addmediaissue(char * filepath, char * title, issdates * date, med ** media, int med_no) +/*	Appends data from media structures to xml file. */ +{ +	xmlDocPtr media_file; +	xmlNodePtr node; + +	/* if xml file doesn't exist */ +	if(ready_xml(filepath, "media", &media_file, &node)) +	{ +		/* set up fresh xml file */ +		media_file = xmlNewDoc(NULL); +		node = xmlNewNode(NULL, "media"); +		xmlDocSetRootElement(media_file, node); +	} + +	xmlNodePtr curissue; +	char tmp[5]; + +	/* set up issue node */ +	curissue = xmlNewTextChild(node, NULL, "issue", NULL); + +	xmlNewProp(curissue, "title", title); + +	snprintf(tmp,5,"%i", date->year); +	xmlNewProp(curissue, "year", tmp); + +	snprintf(tmp,5,"%i",date->firstmonth); +	xmlNewProp(curissue, "firstmonth", tmp); + +	snprintf(tmp,5,"%i",date->lastmonth); +	xmlNewProp(curissue, "lastmonth", tmp); + +	xmlNodePtr curitem; + +	int count; +	for(count = 0; count <= med_no; count++) +	{ +		curitem = xmlNewTextChild(curissue, NULL, "item", media[count]->title); + +		xmlNewProp(curitem, "uri", media[count]->uri); + +		if(media[count]->comment) +			xmlNewProp(curitem, "comment", media[count]->comment); +		if(media[count]->preview_uri) +			xmlNewProp(curitem, "preview_uri", media[count]->preview_uri); +	} + +	xmlKeepBlanksDefault(0); + +	xmlSaveFormatFile(filepath, media_file, 1); + +	xmlFreeDoc(media_file); + +	return 0; +} + +iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues) +/*	Fills issues with relevant info from media xml, creating new +	ones where necessary. */ +{ +	issdates tmpdate; + +	iss * cur_issue; med * cur_media; + +	xmlDocPtr media_file; +	xmlNodePtr node, itnode; + +	if(ready_xml(filepath, "media", &media_file, &node)) +		return NULL; + +	*node = *node->xmlChildrenNode; + +	int issue_there = 0; + +	char title[STR_MAX]; +	issdates curdate; +	int tmp; + +	while (node != NULL) +	{ +		if(!xmlStrcmp(node->name,(char *) "issue")) +		{ +			/* check if issue with title already exists */ +			for(tmp = 0; tmp < *no_of_issues; tmp++) +			{ +				curdate.year = atoi( (char *) xmlGetProp(node, "year")); +				curdate.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); +				curdate.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); + +				if( curdate.year == issue[tmp]->date.year && +				curdate.firstmonth == issue[tmp]->date.firstmonth && +				curdate.lastmonth == issue[tmp]->date.lastmonth ) +				{ +					issue_there = 1; +					break; +				} +			} + +			if(!issue_there) +			{	/* advance to the next free issue */ +				iss ** tmpiss = NULL; +				if(*no_of_issues == -1) +				{	/* make issue** a new array of issue pointers */ +					if( (tmpiss = malloc(sizeof(iss *))) == NULL ) +						nogo_mem(); +				} +				else +				{	/* add a new pointer to issue pointer list */ +					if( (tmpiss = realloc(issue, sizeof(iss *) + (*no_of_issues * sizeof(iss *)))) == NULL ) +					nogo_mem(); +				} + +				(*no_of_issues)++; + +				/* make new array item a pointer to issue */ +				if( (tmpiss[*no_of_issues] = malloc(sizeof(iss))) == NULL ) +					nogo_mem(); + +				issue = tmpiss; + +				issue[*no_of_issues]->date.year = atoi( (char *) xmlGetProp(node, "year")); +				issue[*no_of_issues]->date.firstmonth = atoi( (char *) xmlGetProp(node, "firstmonth")); +				issue[*no_of_issues]->date.lastmonth = atoi( (char *) xmlGetProp(node, "lastmonth")); + +				strncpy(issue[*no_of_issues]->title, (char *) xmlGetProp(node, "title"), STR_MAX); + +				tmp = *no_of_issues; +			} + +			iss * cur_issue = issue[tmp]; + +			issue[tmp]->no_of_media = -1; + +			itnode = node->xmlChildrenNode; + +			while (itnode != NULL) +			{ + +				if(!xmlStrcmp(itnode->name,(char *) "item")) +				{ +					/* assign memory for the new media */ +					cur_issue->media = assignnew_med(cur_issue->media, &(cur_issue->no_of_media)); + +					/* setup media globals */ +					cur_media = cur_issue->media[cur_issue->no_of_media]; +					 +					cur_media->uri[0] = '\0'; +					cur_media->title[0] = '\0'; +					cur_media->comment[0] = '\0'; +					cur_media->preview_uri[0] = '\0'; +					cur_media->size = 0; + +					/* add media info to cur_media */ +					if(xmlGetProp(itnode, "uri")) +						strncpy(cur_media->uri, (char *) xmlGetProp(itnode, "uri"), STR_MAX); + +					if(xmlGetProp(itnode, "comment")) +						strncpy(cur_media->comment, (char *) xmlGetProp(itnode, "comment"), STR_MAX); + +					if(xmlGetProp(itnode, "preview")) +						strncpy(cur_media->preview_uri, (char *) xmlGetProp(itnode, "preview_uri"), STR_MAX); + +					strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX); +				} +				 +				itnode = itnode->next; +			} +		} + +		node = node->next; +	} + +	xmlFreeDoc(media_file); + +	issuesort(issue, no_of_issues); + +	return issue; +} diff --git a/src/tocxml.c b/src/tocxml.c new file mode 100644 index 0000000..3740326 --- /dev/null +++ b/src/tocxml.c @@ -0,0 +1,264 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "issue.h" +#include "getht.h" + +iss ** parsetoc(char *filepath, int * iss_no, int * latest); +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest); +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section); + +void tokenise_hyphons(char to_token[10], int * first, int * last); + +int no_of_issues; + +iss ** parsetoc(char *filepath, int * iss_no, int * latest) +/*	starts parsing of xml to issue structure	*/ +{ +	xmlDocPtr file; +	xmlNodePtr node; + +	if(ready_xml(filepath, "issues", &file, &node)) +		return NULL; + +	*node = *node->xmlChildrenNode; + +	no_of_issues = -1; + +	iss ** issue = NULL; + +	int year; + +	xmlNodePtr cnode; + +	while(node != NULL) +	{ +		if(!xmlStrncmp(node->name,(char *) "year",4)) +		{ +			cnode = node->children; +			while(cnode != NULL) +			{ +    				if(!xmlStrncmp(cnode->name,(char *) "issue",5)) +				{ +					/* assign memory for the new issue */ +					issue = assignnew_iss(issue, &no_of_issues); + +					/* setup issue globals */ +					issue[no_of_issues]->no_of_media = -1; +					issue[no_of_issues]->no_of_sections = -1; +					issue[no_of_issues]->date.year = +						atoi( (const char *)(xmlStrsub(node->name,5,4)) ); +					tokenise_hyphons( +							xmlStrsub(cnode->name,6,5), +							&(issue[no_of_issues]->date.firstmonth), +							&(issue[no_of_issues]->date.lastmonth)); + +					/* parse the issue */ +					parseissue(file, cnode, issue[no_of_issues], latest); +				} +				cnode = cnode->next; +			} +		} +		node = node->next; +	} + +	xmlFreeDoc(file); + +	*iss_no = no_of_issues; + +	issuesort(issue, no_of_issues); + +	return issue; +} + +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest) +/*	parses issue from xml, saving in cur_issue structure	*/ +{ +	strncpy(cur_issue->title, (char *) xmlGetProp(node, "title"), STR_MAX); +	strncpy(cur_issue->preview_uri, (char *) xmlGetProp(node, "coverlink"), STR_MAX); + +	if(xmlGetProp(node, "current") && *latest==-1) +		*latest = no_of_issues; + +	node = node->xmlChildrenNode; + +	while(node != NULL){ +		if(!xmlStrncmp(node->name, (char *) "section",7) || +			!xmlStrcmp(node->name, (const xmlChar *) "cover")) +		{ +			/* assign memory for new section */ +			cur_issue->section =  +				assignnew_sec(cur_issue->section, &(cur_issue->no_of_sections)); + +			/* setup section globals */ +			cur_issue->section[cur_issue->no_of_sections]->no_of_items = -1; + +			/* parse the section */ +			parsesection(file, node, cur_issue->section[cur_issue->no_of_sections]); +		} +		node = node->next; +	} + +	return 0; +} + +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section) +/*	parses section xml, filling cur_section structure */ +{ +	it * cur_item = NULL; + +	strncpy(cur_section->uri, (char *) xmlGetProp(node, "pdflink"), STR_MAX); +	strncpy(cur_section->title, (char *) xmlGetProp(node, "title"), STR_MAX); + +	if(!xmlStrcmp(node->name, (const xmlChar *) "cover")) +		cur_section->number = 0; +	else +		cur_section->number = atoi( (const char *)(xmlStrsub(node->name,8,1)) ); + +	node = node->xmlChildrenNode; + +	char * pagenums; + +	it ** tmp = NULL; + +	while(node != NULL) +	{ +		if(!xmlStrcmp(node->name, (const xmlChar *) "item")) +		{ +			if(xmlNodeListGetString(file, node->xmlChildrenNode, 1) != NULL) +			/* ignore items without titles */ +			{ +				/* assign memory for new item */ +				cur_section->item = +					assignnew_it( cur_section->item, &(cur_section->no_of_items)); + +				cur_item = cur_section->item[cur_section->no_of_items]; + +				/* parse item */ +				cur_item->title = xmlNodeListGetString(file, node->xmlChildrenNode, 1); +				if(pagenums = (char *) xmlGetProp(node, "pages")) +					tokenise_hyphons(pagenums, &(cur_item->firstpage), &(cur_item->lastpage)); +				else +				{ +					cur_item->firstpage = 0; +					cur_item->lastpage = 0; +				} +			} +		} +	node = node->next;	 +	} +} + +void tokenise_hyphons(char to_token[10], int * first, int * last) +/*	splits string to_token, filling positions passed */ +{ +	char token[10]; +	char * tmp; + +	tmp = token; + +	while(*to_token != '-' && *to_token) +	{ +		*tmp = *to_token; +		to_token++; tmp++; +	} + +	*first = atoi(token); + +	if(!*to_token) +		*last = *first; +	else +	{ +		to_token++; /* advance past '-' */ +		tmp = token; /* reset tmp */ +		while(*to_token) +		{ +			*tmp = *to_token; +			tmp++; to_token++; +		} +		*last = atoi(token); +	} +} + +int cur_identifiers(char * filepath, char * title, issdates * date) +/*	parses xml file to ascertain current issue title and date */ +{ +	xmlDocPtr file; + +	if((file = xmlParseFile(filepath)) == NULL) +	{ +		return 1; +	} + +	xmlNodePtr node,cnode; + +	node = xmlDocGetRootElement(file); + +	if(node == NULL) +	{ +		fprintf(stderr,"Error: xml file %s has no root element",filepath); +		xmlFreeDoc(file); +		return 1; +	} + +	if(xmlStrcmp(node->name, (const xmlChar *) "issues")) +	{ +		fprintf(stderr,"Document of the wrong type, root node is '%s' (expected 'issues').\n",(char *) node->name); +		fprintf(stderr,"Continuing regardless...\n"); +	} + +	/* Now that's all sorted, let's do some work */ + +	node = node->xmlChildrenNode; + +	xmlChar *temp; +	while(node != NULL) +	{ +		if(!xmlStrncmp(node->name,(char *) "year",4)) +		{ +			cnode = node->children; +			while(cnode != NULL) +			{ +	   		    if(!xmlStrncmp(cnode->name,(char *) "issue",5)) +				{ +					temp = xmlGetProp(cnode, "current"); +        			if(temp) +        			{ +        			    strncpy(title, (char *) xmlGetProp(cnode, "title"), STR_MAX); +						date->year = atoi( (const char *)(xmlStrsub(node->name,5,4)) ); +						tokenise_hyphons(xmlStrsub(cnode->name,6,5), &(date->firstmonth), &(date->lastmonth)); +						return 0; +        			} +				} +				cnode = cnode->next; +			} +		} +		node = node->next; +	} + +	return 0; +} diff --git a/src/version.h b/src/version.h new file mode 100644 index 0000000..0713569 --- /dev/null +++ b/src/version.h @@ -0,0 +1,22 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#define VERSION "0.0.1" diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..1a3ea3c --- /dev/null +++ b/src/xml.c @@ -0,0 +1,62 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + *  + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "getht.h" +#include "issue.h" + +int ready_xml(char * filepath, char * rootnode, xmlDocPtr * file, xmlNodePtr * node) +/*	Opens filepath, filling nec pointers with essential information. */ +{ +	if(access(filepath, R_OK) != 0) +		return 1; + +	if((*file = xmlParseFile(filepath)) == NULL) +	{ +		fprintf(stderr, "Error: cannot open xml file %s\n", filepath); +		return 1; +	} + +	*node = xmlDocGetRootElement(*file); + +	if(*node == NULL) +	{ +		fprintf(stderr,"Error: xml file %s has no root element",filepath); +		xmlFreeDoc(*file); +		return 1; +	} + +	char * test; +	test = (char *) (*node)->name; + +	if(xmlStrcmp((*node)->name, (const xmlChar *) rootnode)) +	{ +		fprintf(stderr,"Document of the wrong type, root node is '%s' (expected '%s').\n",(char *) (*node)->name, rootnode); +		fprintf(stderr,"Continuing regardless...\n"); +	} + +	return 0; +} | 
