diff options
| author | Nick White <git@njw.me.uk> | 2010-05-19 17:15:41 +0100 | 
|---|---|---|
| committer | Nick White <git@njw.me.uk> | 2010-05-19 17:15:41 +0100 | 
| commit | e91ec0d87aa31cb465fe8cf934d405ad56342eba (patch) | |
| tree | 1709e437c0d8f144bec479975a777c6ae8580eba /tocxml.c | |
| parent | 6cf58b46037cf04915fa507813ab7419db2e45ec (diff) | |
Switched to simpler build system, and fixed bugs
Now there's a proper build system in place, which is actually simple
enough to understand.
I also fixed plenty of warnings about the code (reminding me how badly I
knew C when I wrote this).
Hinduism today aren't indexing their new issues using the index file I
was sourcing any more, so I don't expect to fix any more bugs or improve
this much.
Diffstat (limited to 'tocxml.c')
| -rw-r--r-- | tocxml.c | 241 | 
1 files changed, 241 insertions, 0 deletions
| diff --git a/tocxml.c b/tocxml.c new file mode 100644 index 0000000..ac75ec7 --- /dev/null +++ b/tocxml.c @@ -0,0 +1,241 @@ +/* + * This file is part of GetHT + * + * See COPYING file for copyright, license and warranty details. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libxml/xmlmemory.h> +#include <libxml/parser.h> + +#include "getht.h" + +iss ** parsetoc(char *filepath, int * iss_no); +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue); +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section); + +void tokenise_hyphons(char to_token[10], int * first, int * last); + +int no_of_issues; + +iss ** parsetoc(char *filepath, int * iss_no) +/*	starts parsing of xml to issue structure	*/ +{ +	xmlDocPtr file; +	xmlNodePtr node; + +	if(ready_xml(filepath, "issues", &file, &node)) +		return NULL; + +	*node = *node->xmlChildrenNode; + +	no_of_issues = -1; + +	iss ** issue = NULL; + +	xmlNodePtr cnode; + +	while(node != NULL) +	{ +		if(!xmlStrncmp(node->name,(unsigned char *) "year",4)) +		{ +			cnode = node->children; +			while(cnode != NULL) +			{ +    				if(!xmlStrncmp(cnode->name,(unsigned char *) "issue",5)) +				{ +					/* assign memory for the new issue */ +					issue = assignnew_iss(issue, &no_of_issues); + +					/* setup issue globals */ +					issue[no_of_issues]->no_of_sections = -1; +					issue[no_of_issues]->date.year = +						atoi( (const char *)(xmlStrsub(node->name,5,4)) ); +					tokenise_hyphons( +							(char *) xmlStrsub(cnode->name,6,5), +							&(issue[no_of_issues]->date.firstmonth), +							&(issue[no_of_issues]->date.lastmonth)); + +					/* parse the issue */ +					parseissue(file, cnode, issue[no_of_issues]); +				} +				cnode = cnode->next; +			} +		} +		node = node->next; +	} + +	xmlFreeDoc(file); + +	*iss_no = no_of_issues; + +	issuesort(issue, no_of_issues); + +	return issue; +} + +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue) +/*	parses issue from xml, saving in cur_issue structure	*/ +{ +	strncpy(cur_issue->title, (char *) xmlGetProp(node, (unsigned char *)"title"), STR_MAX); +	strncpy(cur_issue->preview_uri, (char *) xmlGetProp(node, (unsigned char *) "coverlink"), STR_MAX); + +	node = node->xmlChildrenNode; + +	while(node != NULL){ +		if(!xmlStrncmp(node->name, (unsigned char *) "section",7) || +			!xmlStrcmp(node->name, (const xmlChar *) "cover")) +		{ +			/* assign memory for new section */ +			cur_issue->section =  +				assignnew_sec(cur_issue->section, &(cur_issue->no_of_sections)); + +			/* setup section globals */ +			cur_issue->section[cur_issue->no_of_sections]->no_of_items = -1; + +			/* parse the section */ +			parsesection(file, node, cur_issue->section[cur_issue->no_of_sections]); +		} +		node = node->next; +	} + +	return 0; +} + +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section) +/*	parses section xml, filling cur_section structure */ +{ +	it * cur_item = NULL; + +	strncpy(cur_section->uri, (char *) xmlGetProp(node, (unsigned char *) "pdflink"), STR_MAX); +	strncpy(cur_section->title, (char *) xmlGetProp(node, (unsigned char *) "title"), STR_MAX); + +	if(!xmlStrcmp(node->name, (const xmlChar *) "cover")) +		cur_section->number = 0; +	else +		cur_section->number = atoi( (const char *)(xmlStrsub(node->name,8,1)) ); + +	node = node->xmlChildrenNode; + +	char * pagenums; + +	while(node != NULL) +	{ +		if(!xmlStrcmp(node->name, (const xmlChar *) "item")) +		{ +			if(xmlNodeListGetString(file, node->xmlChildrenNode, 1) != NULL) +			/* ignore items without titles */ +			{ +				/* assign memory for new item */ +				cur_section->item = +					assignnew_it( cur_section->item, &(cur_section->no_of_items)); + +				cur_item = cur_section->item[cur_section->no_of_items]; + +				/* parse item */ +				cur_item->title = (char *) xmlNodeListGetString(file, node->xmlChildrenNode, 1); +				if(pagenums == (char *) xmlGetProp(node, (unsigned char *) "pages")) +					tokenise_hyphons(pagenums, &(cur_item->firstpage), &(cur_item->lastpage)); +				else +				{ +					cur_item->firstpage = 0; +					cur_item->lastpage = 0; +				} +			} +		} +	node = node->next;	 +	} +} + +void tokenise_hyphons(char to_token[10], int * first, int * last) +/*	splits string to_token, filling positions passed */ +{ +	char token[10]; +	char * tmp; + +	tmp = token; + +	while(*to_token != '-' && *to_token) +	{ +		*tmp = *to_token; +		to_token++; tmp++; +	} + +	*first = atoi(token); + +	if(!*to_token) +		*last = *first; +	else +	{ +		to_token++; /* advance past '-' */ +		tmp = token; /* reset tmp */ +		while(*to_token) +		{ +			*tmp = *to_token; +			tmp++; to_token++; +		} +		*last = atoi(token); +	} +} + +int cur_identifiers(char * filepath, char * title, issdates * date) +/*	parses xml file to ascertain current issue title and date */ +{ +	xmlDocPtr file; + +	if((file =  xmlReadFile(filepath, "ISO-8859-1", 0)) == NULL) +	{ +		return 1; +	} + +	xmlNodePtr node,cnode; + +	node = xmlDocGetRootElement(file); + +	if(node == NULL) +	{ +		fprintf(stderr,"Error: xml file %s has no root element",filepath); +		xmlFreeDoc(file); +		return 1; +	} + +	if(xmlStrcmp(node->name, (const xmlChar *) "issues")) +	{ +		fprintf(stderr,"Document of the wrong type, root node is '%s' (expected 'issues').\n",(char *) node->name); +		fprintf(stderr,"Continuing regardless...\n"); +	} + +	/* Now that's all sorted, let's do some work */ + +	node = node->xmlChildrenNode; + +	xmlChar *temp; +	while(node != NULL) +	{ +		if(!xmlStrncmp(node->name,(unsigned char *) "year",4)) +		{ +			cnode = node->children; +			while(cnode != NULL) +			{ +	   		    if(!xmlStrncmp(cnode->name,(unsigned char *) "issue",5)) +				{ +					temp = xmlGetProp(cnode, (unsigned char *) "current"); +        			if(temp) +        			{ +        			    strncpy(title, (char *) xmlGetProp(cnode, (unsigned char *) "title"), STR_MAX); +						date->year = atoi( (const char *)(xmlStrsub(node->name,5,4)) ); +						tokenise_hyphons((char *)xmlStrsub(cnode->name,6,5), &(date->firstmonth), &(date->lastmonth)); +						return 0; +        			} +				} +				cnode = cnode->next; +			} +		} +		node = node->next; +	} + +	return 0; +} | 
