From 54160445284875eee9ed85d513791b193a076d0b Mon Sep 17 00:00:00 2001
From: Nick White <arch@njw.me.uk>
Date: Wed, 2 Apr 2008 22:20:24 +0000
Subject: Fix for media xml crash bugs

Formatting
Check for empty item titles
Created function to check if any part of a string contains a phrase
Added some basic error checking to getquote
Note this does not completely fix all problems

git-archimport-id: getht@sv.gnu.org/getht--mainline--0.1--patch-31
---
 src/config.c   |   2 +-
 src/download.c |   5 +-
 src/getht.h    |   2 +-
 src/issuemem.c |   2 +-
 src/mediarev.c | 141 ++++++++++++++++++++++++++++++++++-----------------------
 src/mediaxml.c |   5 +-
 6 files changed, 95 insertions(+), 62 deletions(-)

(limited to 'src')

diff --git a/src/config.c b/src/config.c
index 8279f54..a963cc7 100644
--- a/src/config.c
+++ b/src/config.c
@@ -72,7 +72,7 @@ int loadconfig(char * htde_path, char * issue_path, int * update)
 				proxy_type = SOCKS5;
 			else
 				fprintf(stderr,
-					"Proxy type %s not known, please use either http, socks4 or socks5",
+					"Proxy type %s not known, please use either http, socks4 or socks5\n",
 					parameter);
 		}
 		else if(!strcmp(option, "proxy_address"))
diff --git a/src/download.c b/src/download.c
index 1108b6d..cddf0de 100644
--- a/src/download.c
+++ b/src/download.c
@@ -134,7 +134,10 @@ int update_progress(void *data, double dltotal, double dlnow,
 /*	Print status information */
 {
 	double frac;
-	frac = 100 * dlnow / dltotal;
+	if(dlnow > 0)
+		frac = 100 * dlnow / dltotal;
+	else
+		frac = 0;
 	printf("\rDownload progress: %3.0lf%% ", frac);
 	fflush(stdout);
 
diff --git a/src/getht.h b/src/getht.h
index df2b1b2..9e397dc 100644
--- a/src/getht.h
+++ b/src/getht.h
@@ -26,7 +26,7 @@
 #define ISS_XML_FILE	"htde_toc.xml"
 #define MED_REVGZ_FILE	"htde_media-player.rev.gz"
 
-#define STR_MAX		512
+#define STR_MAX		1024
 
 #define MAX_ISS		10
 
diff --git a/src/issuemem.c b/src/issuemem.c
index 3a40d7e..63fe5b4 100644
--- a/src/issuemem.c
+++ b/src/issuemem.c
@@ -125,7 +125,7 @@ med ** assignnew_med(med ** media, int * no_of_media)
 	(*no_of_media)++;                                         
 
 	/* make new array item a pointer to issue */
-	if( (tmp[*no_of_media] = malloc(sizeof(med))) == NULL )
+	if( ( tmp[*no_of_media] = malloc(sizeof(med)) ) == NULL )
 		nogo_mem();
 
 	return tmp;
diff --git a/src/mediarev.c b/src/mediarev.c
index b81d8c6..975c971 100644
--- a/src/mediarev.c
+++ b/src/mediarev.c
@@ -26,17 +26,20 @@
 #include "getht.h"
 #include "issue.h"
 
-int smilurl(char * smilurl, med * cur_media);
-void getquote(char * input, char * label);
-void removeleadingspace(char * cur_line);
+int gzgetstr(char * newstr, gzFile * gzfile);
+int getquote(char * input, char * quote, int number);
+int strcontains(char * source, char * comparison);
 
 med ** parsemediagz(char * media_path, int * no_of_media)
 /*	Parses gzipped adobe pagemaker files for media urls and metadata,
  *	filling media with the information. */
 {
+	char c;
 	char cur_line[STR_MAX];
 	gzFile mediagz;
 
+	*no_of_media = -1;
+
 	med ** media = NULL;
 
 	strcpy(cur_line,""); /* reset cur_line */
@@ -47,13 +50,10 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 
 	while(gzeof(mediagz)==0)
 	{
-		gzgets(mediagz, cur_line, STR_MAX);
-		cur_line[strlen(cur_line)-1] = '\0'; /* get rid of trailing newline */
+		gzgetstr(cur_line, mediagz);
 
-		if(!strcmp(cur_line,"on mouseUp"))
+		if(strcontains(cur_line,"on mouseUp") == 0)
 		{
-			strcpy(cur_line,""); /* reset cur_line */
-
 			/* assign memory for the new media */
 			media = assignnew_med(media, no_of_media);
 
@@ -67,30 +67,22 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 			cur_media->size = 0;
 
 			/* process rev file */
-			while(strcmp(cur_line,"end mouseUp") && gzeof(mediagz)==0)
+			while(strcontains(cur_line,"end mouseUp") && gzeof(mediagz)==0)
 			{
 				strcpy(cur_line,""); /* reset cur_line */
-				gzgets(mediagz, cur_line, STR_MAX);
-				cur_line[strlen(cur_line)-1] = '\0'; /* remove trailing newline */
-
-				removeleadingspace(cur_line);
-
-				if(!strncmp(cur_line,"set the filename of player \"", 28))
-				{
-					/* todo: check if smil, if so follow to find uri */
-					//sscanf(cur_line,"set the filename of player \"player1\" to \"%s\"",cur_media->uri);
-					sscanf(cur_line,"set the filename of player \"%[^\"] to \"%s\"",NULL,cur_media->uri);
-					cur_media->uri[strlen(cur_media->uri)-1] = '\0'; /* workaround extra character */
-				}
-				else if(!strncmp(cur_line,"set the label of this stack to \"",32))
-				{
-					getquote(cur_line,cur_media->title);
-				}
-				else if(!strncmp(cur_line,"statusMsg \(\"",12))
-				{
-					getquote(cur_line,cur_media->comment);
-				}
+				gzgetstr(cur_line, mediagz);
+
+				if(!strcontains(cur_line,"set the filename of player \"") && strcontains(cur_line,"empty"))
+					getquote(cur_line, cur_media->uri, 2);
+				else if(!strcontains(cur_line,"set the label of this stack to"))
+					getquote(cur_line, cur_media->title, 1);
+				else if(!strcontains(cur_line,"statusMsg\(\"") || !strcontains(cur_line,"StatusMsg \(\""))
+					getquote(cur_line, cur_media->comment, 1);
 			}
+
+			/* if it turns out that there was nothing useful there, remove the new media */
+			if(cur_media->uri[0] == '\0')
+				(*no_of_media)--;
 		}
 		strcpy(cur_line,""); /* reset cur_line */
 	}
@@ -98,19 +90,20 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 	return media;
 }
 
-int smilurl(char * smilurl, med * cur_media)
-/*	Extracts url and other data from remote smil file, storing
- *	the info in the cur_media structure. */
+int getquote(char * input, char * quote, int number)
+/*	assigns quote string from a line of the format:
+ *	'something "quote" something' */
 {
-	return 0;
-}
+	int curnum;
 
-void getquote(char * input, char * quote)
-/*	sets quote from a line of the format:
- *	`something "quote" something' */
-{
-	char * cur_pos;
-	cur_pos = quote;
+	/* advance past earlier quotes */
+	for(curnum=0; curnum < ((number*2)-2); curnum++)
+	{
+		/* advance until " character is reached */
+		while(*input != '"' && *input)	
+			input++;
+		input++;
+	}
 
 	/* advance until " character is reached */
 	while(*input != '"' && *input)	
@@ -119,33 +112,67 @@ void getquote(char * input, char * quote)
 	input++;
 
 	/* copy characters in until next '"' */
-	while(*input != '"' && *input)
+	while(*input != '"')
 	{
-		*cur_pos = *input;
-		cur_pos++;
+		/* if end is found without closing bracket,
+		 * exit with error */
+		if(! *input)
+			return 1;
+		*quote = *input;
+		quote++;
 		input++;
 	}
 
-	*cur_pos = '\0';
+	*quote = '\0';
+
+	return 0;
 }
 
-void removeleadingspace(char * cur_line)
+int strcontains(char * source, char * comparison)
 {
-	int tmp, newpos;
+	int srcpos = 0; /* position in source string */
+	int compos = 0; /* position in comparison string */
 
-	char temp_str[STR_MAX];
+	while(srcpos < strlen(source) && srcpos < STR_MAX)
+	{
+		/* if the chars match, move to the next in comparison,
+		 * otherwise only advance the source */
+		if(source[srcpos] == comparison[compos])
+			compos++;
+		else
+			compos = 0;
+
+		/* if we got to the end of comparison, there's a match */
+		if(compos == strlen(comparison))
+			return 0;
+
+		srcpos++;
+	}
 
-	/* advance past whitespace */
-	tmp = 0;
-	while (cur_line[tmp] == ' ' || cur_line[tmp] == '\t')
-		tmp++;
+	/* if we got all the way through the source, there's no match */
+	return 1;
+}
 
-	/* copy from there to temp_str */
-	for(newpos = 0; cur_line[tmp]; tmp++, newpos++)
-		temp_str[newpos] = cur_line[tmp];
+int gzgetstr(char * newstr, gzFile * gzfile)
+/*	a reimplementation of gzgetstr, which doesn't choke at odd characters
+ */
+{
+	strcpy(newstr,""); /* reset cur_line */
+	char c;
 
-	temp_str[newpos] = '\0';
+	while((c = gzgetc(gzfile)) != -1)
+	{
+		if(c == '\n')
+			break;
+
+		/* append the char if there's room in the str */
+		if(strlen(newstr)+1 < STR_MAX)
+			strncat(newstr,&c,1);
+		/* if the line is too long just break to pick up the 2nd half on next pass,
+		 * not perfect (will miss strs cut into 2 sections), but good enough for now */
+		else
+			break;
+	}
 
-	/* copy temp_str to cur_line */
-	strncpy(cur_line, temp_str, sizeof(temp_str));
+	return 0;
 }
diff --git a/src/mediaxml.c b/src/mediaxml.c
index 02c2cad..54ed3ae 100644
--- a/src/mediaxml.c
+++ b/src/mediaxml.c
@@ -228,7 +228,10 @@ iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues)
 					if(xmlGetProp(itnode, "preview"))
 						strncpy(cur_media->preview_uri, (char *) xmlGetProp(itnode, "preview_uri"), STR_MAX);
 
-					strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX);
+					if((char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1))
+						strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX);
+					else
+						strncpy(cur_media->title, "untitled", STR_MAX);
 				}
 				
 				itnode = itnode->next;
-- 
cgit v1.2.3