From f811c2d4823f95d7e90f25e0e7a98e5c5abcf3e2 Mon Sep 17 00:00:00 2001 From: Nick White Date: Thu, 3 May 2007 00:31:22 +0000 Subject: Added Autotools, changed dir structure, added docs Added (perhaps somewhat shaky) autotools support Added tagging rules to junkify files generated by autotools Added a directory structure Updated README & INSTALL files to reflect changes Wrote a man page Changed version numbers in preparation for a release git-archimport-id: getht@sv.gnu.org/getht--mainline--0.1--patch-23 --- src/tocxml.c | 264 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 src/tocxml.c (limited to 'src/tocxml.c') diff --git a/src/tocxml.c b/src/tocxml.c new file mode 100644 index 0000000..3740326 --- /dev/null +++ b/src/tocxml.c @@ -0,0 +1,264 @@ +/* + * Copyright 2006 Nick White + * + * This file is part of GetHT + * + * GetHT is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GetHT is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GetHT; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include + +#include "issue.h" +#include "getht.h" + +iss ** parsetoc(char *filepath, int * iss_no, int * latest); +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest); +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section); + +void tokenise_hyphons(char to_token[10], int * first, int * last); + +int no_of_issues; + +iss ** parsetoc(char *filepath, int * iss_no, int * latest) +/* starts parsing of xml to issue structure */ +{ + xmlDocPtr file; + xmlNodePtr node; + + if(ready_xml(filepath, "issues", &file, &node)) + return NULL; + + *node = *node->xmlChildrenNode; + + no_of_issues = -1; + + iss ** issue = NULL; + + int year; + + xmlNodePtr cnode; + + while(node != NULL) + { + if(!xmlStrncmp(node->name,(char *) "year",4)) + { + cnode = node->children; + while(cnode != NULL) + { + if(!xmlStrncmp(cnode->name,(char *) "issue",5)) + { + /* assign memory for the new issue */ + issue = assignnew_iss(issue, &no_of_issues); + + /* setup issue globals */ + issue[no_of_issues]->no_of_media = -1; + issue[no_of_issues]->no_of_sections = -1; + issue[no_of_issues]->date.year = + atoi( (const char *)(xmlStrsub(node->name,5,4)) ); + tokenise_hyphons( + xmlStrsub(cnode->name,6,5), + &(issue[no_of_issues]->date.firstmonth), + &(issue[no_of_issues]->date.lastmonth)); + + /* parse the issue */ + parseissue(file, cnode, issue[no_of_issues], latest); + } + cnode = cnode->next; + } + } + node = node->next; + } + + xmlFreeDoc(file); + + *iss_no = no_of_issues; + + issuesort(issue, no_of_issues); + + return issue; +} + +int parseissue(xmlDocPtr file, xmlNodePtr node, iss * cur_issue, int * latest) +/* parses issue from xml, saving in cur_issue structure */ +{ + strncpy(cur_issue->title, (char *) xmlGetProp(node, "title"), STR_MAX); + strncpy(cur_issue->preview_uri, (char *) xmlGetProp(node, "coverlink"), STR_MAX); + + if(xmlGetProp(node, "current") && *latest==-1) + *latest = no_of_issues; + + node = node->xmlChildrenNode; + + while(node != NULL){ + if(!xmlStrncmp(node->name, (char *) "section",7) || + !xmlStrcmp(node->name, (const xmlChar *) "cover")) + { + /* assign memory for new section */ + cur_issue->section = + assignnew_sec(cur_issue->section, &(cur_issue->no_of_sections)); + + /* setup section globals */ + cur_issue->section[cur_issue->no_of_sections]->no_of_items = -1; + + /* parse the section */ + parsesection(file, node, cur_issue->section[cur_issue->no_of_sections]); + } + node = node->next; + } + + return 0; +} + +void parsesection(xmlDocPtr file, xmlNodePtr node, sec * cur_section) +/* parses section xml, filling cur_section structure */ +{ + it * cur_item = NULL; + + strncpy(cur_section->uri, (char *) xmlGetProp(node, "pdflink"), STR_MAX); + strncpy(cur_section->title, (char *) xmlGetProp(node, "title"), STR_MAX); + + if(!xmlStrcmp(node->name, (const xmlChar *) "cover")) + cur_section->number = 0; + else + cur_section->number = atoi( (const char *)(xmlStrsub(node->name,8,1)) ); + + node = node->xmlChildrenNode; + + char * pagenums; + + it ** tmp = NULL; + + while(node != NULL) + { + if(!xmlStrcmp(node->name, (const xmlChar *) "item")) + { + if(xmlNodeListGetString(file, node->xmlChildrenNode, 1) != NULL) + /* ignore items without titles */ + { + /* assign memory for new item */ + cur_section->item = + assignnew_it( cur_section->item, &(cur_section->no_of_items)); + + cur_item = cur_section->item[cur_section->no_of_items]; + + /* parse item */ + cur_item->title = xmlNodeListGetString(file, node->xmlChildrenNode, 1); + if(pagenums = (char *) xmlGetProp(node, "pages")) + tokenise_hyphons(pagenums, &(cur_item->firstpage), &(cur_item->lastpage)); + else + { + cur_item->firstpage = 0; + cur_item->lastpage = 0; + } + } + } + node = node->next; + } +} + +void tokenise_hyphons(char to_token[10], int * first, int * last) +/* splits string to_token, filling positions passed */ +{ + char token[10]; + char * tmp; + + tmp = token; + + while(*to_token != '-' && *to_token) + { + *tmp = *to_token; + to_token++; tmp++; + } + + *first = atoi(token); + + if(!*to_token) + *last = *first; + else + { + to_token++; /* advance past '-' */ + tmp = token; /* reset tmp */ + while(*to_token) + { + *tmp = *to_token; + tmp++; to_token++; + } + *last = atoi(token); + } +} + +int cur_identifiers(char * filepath, char * title, issdates * date) +/* parses xml file to ascertain current issue title and date */ +{ + xmlDocPtr file; + + if((file = xmlParseFile(filepath)) == NULL) + { + return 1; + } + + xmlNodePtr node,cnode; + + node = xmlDocGetRootElement(file); + + if(node == NULL) + { + fprintf(stderr,"Error: xml file %s has no root element",filepath); + xmlFreeDoc(file); + return 1; + } + + if(xmlStrcmp(node->name, (const xmlChar *) "issues")) + { + fprintf(stderr,"Document of the wrong type, root node is '%s' (expected 'issues').\n",(char *) node->name); + fprintf(stderr,"Continuing regardless...\n"); + } + + /* Now that's all sorted, let's do some work */ + + node = node->xmlChildrenNode; + + xmlChar *temp; + while(node != NULL) + { + if(!xmlStrncmp(node->name,(char *) "year",4)) + { + cnode = node->children; + while(cnode != NULL) + { + if(!xmlStrncmp(cnode->name,(char *) "issue",5)) + { + temp = xmlGetProp(cnode, "current"); + if(temp) + { + strncpy(title, (char *) xmlGetProp(cnode, "title"), STR_MAX); + date->year = atoi( (const char *)(xmlStrsub(node->name,5,4)) ); + tokenise_hyphons(xmlStrsub(cnode->name,6,5), &(date->firstmonth), &(date->lastmonth)); + return 0; + } + } + cnode = cnode->next; + } + } + node = node->next; + } + + return 0; +} -- cgit v1.2.3