7 files changed, 148 insertions, 67 deletions
diff --git a/BUGS b/BUGS
index bd23d3a..2e3ed2a 100644
--- a/BUGS
+++ b/BUGS
@@ -17,17 +17,65 @@ a few things to fix with this:
 			realloc the pointer array,
 			and -1 to no_of_media
 
-	read media xml:
-		should not segfault if it finds an empty item
-		prob needs to check that string is not blank before trying to copy info
 
 further progress:
 -	main obstacle is that "on mouseUp" no longer necessarily follows a newline - file now less organised still
-	therefore we'll have to scan each line (except when already in useful clause (check this) char by char for the occurance
-	best way to do this would be to scan char by char, and if one worthwhile is found to copy from the start to a new string for parsing
+	therefore we'll have to scan each line (except when already in useful clause (check this)) char by char for the occurance
+	best way to do this would be to scan char by char, and if one worthwhile is found to copy from this point to a new string for parsing
 	worth spending a few minutes checking if there's a library function out there to do this, if not create a simple generic one, using pointers, roughly of the form:
 		int containstring(char * toparse, char * match), returning 1 if found and 0 otherwise
 -	change of scanning routine has been done, but is untested
 	consider changing for an improved getquote which can take the nth quote
 		this is due to it being best not to rely on end of lines at end of bit we're interested in
 -	2nd clause of above (if no url is found) is very important now, as majority are unuseful
+
+
+currently (16/08/2007):
+
+the strcontains does not appear to be working correctly; appears to accept any line as a match
+this then creates a bunch of structures, which eventually become confused
+	this may point to some niggling bug in the memory management, but for the moment focus on correct basic rev parsing
+
+currently (28/11/2007):
+
+strcontains works mostly - check the TODO
+the sscanf for extracting uri isn't working - best to just have a slightly more sophisticated getquote function
+next actions:
+	/no_of_media strangely changes where it's pointing to to be "out of bounds" half way through
+	/	always gets stuck on 'Geometry'
+	/	must have an overflow, prob in strcontains (only happens when it's used lots)
+	/	rewrite strcontains using array arithmetic - easier to maintain
+	/ensure whole lines are always parsed (not just first parts thereof) [should now be done]
+	/	to test search output for 1st reference to Publisher, and see if a few lines above is a successful on mouseUp recognition
+	/	get rid of ugly & unnecessary extra checks, which are put there just to ensure nothing bad happens in the delicate phase
+
+currently (09/12/2007)
+
+strcontains has been rewritten to use friendly arrays; appears to work fine (though not heavily tested)
+getting a str has been rewritten into a function: gzgetstr ; appears to work fine thouth also not well tested
+
+note - arch changes they've made:
+only one regular media item (in form of earlier)
+now a new "songs" section, with neat urls following "SongsURLs" (when not in on mouseUp section)
+	song titles follow, again on separate neat lines, but without any easy delimiter to hook on to
+there is also a video section in a "on menupick pItem" clause
+	different syntax to traditional
+	has to work out url using quality choices from pItem (high, med, low; given below)
+	won't be too hard, just be somewhat tricky to get the different options (start with them hardcoded, i.e. on "put tSize & '.mov' after tURL" just make 3 items with high, med, & low qualities
+there is also an interviews section, which looks very similar to Songs (InterviewsURLs)
+	note that interview names are encased in html
+there are also thumbnail jpgs, but they needn't be handled for a while
+
+next actions:
+	improve getquote to point where sscanf line is replaced [e.g. allow to only return quote n (and ignore prev)]
+	only include stack if it has a uri attached (otherwise throw away)
+
+18/12/07
+
+getquote has been rewritten to extract an arbitrary quote n
+empty uri strings are ignored
+
+TODO w/bugs
+generally:
+
+we should be able to read rev.gz / xml from hd, to save time & server hitting with testing
diff --git a/src/config.c b/src/config.c
index 8279f54..a963cc7 100644
--- a/src/config.c
+++ b/src/config.c
@@ -72,7 +72,7 @@ int loadconfig(char * htde_path, char * issue_path, int * update)
 				proxy_type = SOCKS5;
 			else
 				fprintf(stderr,
-					"Proxy type %s not known, please use either http, socks4 or socks5",
+					"Proxy type %s not known, please use either http, socks4 or socks5\n",
 					parameter);
 		}
 		else if(!strcmp(option, "proxy_address"))
diff --git a/src/download.c b/src/download.c
index 1108b6d..cddf0de 100644
--- a/src/download.c
+++ b/src/download.c
@@ -134,7 +134,10 @@ int update_progress(void *data, double dltotal, double dlnow,
 /*	Print status information */
 {
 	double frac;
-	frac = 100 * dlnow / dltotal;
+	if(dlnow > 0)
+		frac = 100 * dlnow / dltotal;
+	else
+		frac = 0;
 	printf("\rDownload progress: %3.0lf%% ", frac);
 	fflush(stdout);
 
diff --git a/src/getht.h b/src/getht.h
index df2b1b2..9e397dc 100644
--- a/src/getht.h
+++ b/src/getht.h
@@ -26,7 +26,7 @@
 #define ISS_XML_FILE	"htde_toc.xml"
 #define MED_REVGZ_FILE	"htde_media-player.rev.gz"
 
-#define STR_MAX		512
+#define STR_MAX		1024
 
 #define MAX_ISS		10
 
diff --git a/src/issuemem.c b/src/issuemem.c
index 3a40d7e..63fe5b4 100644
--- a/src/issuemem.c
+++ b/src/issuemem.c
@@ -125,7 +125,7 @@ med ** assignnew_med(med ** media, int * no_of_media)
 	(*no_of_media)++;                                         
 
 	/* make new array item a pointer to issue */
-	if( (tmp[*no_of_media] = malloc(sizeof(med))) == NULL )
+	if( ( tmp[*no_of_media] = malloc(sizeof(med)) ) == NULL )
 		nogo_mem();
 
 	return tmp;
diff --git a/src/mediarev.c b/src/mediarev.c
index b81d8c6..975c971 100644
--- a/src/mediarev.c
+++ b/src/mediarev.c
@@ -26,17 +26,20 @@
 #include "getht.h"
 #include "issue.h"
 
-int smilurl(char * smilurl, med * cur_media);
-void getquote(char * input, char * label);
-void removeleadingspace(char * cur_line);
+int gzgetstr(char * newstr, gzFile * gzfile);
+int getquote(char * input, char * quote, int number);
+int strcontains(char * source, char * comparison);
 
 med ** parsemediagz(char * media_path, int * no_of_media)
 /*	Parses gzipped adobe pagemaker files for media urls and metadata,
  *	filling media with the information. */
 {
+	char c;
 	char cur_line[STR_MAX];
 	gzFile mediagz;
 
+	*no_of_media = -1;
+
 	med ** media = NULL;
 
 	strcpy(cur_line,""); /* reset cur_line */
@@ -47,13 +50,10 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 
 	while(gzeof(mediagz)==0)
 	{
-		gzgets(mediagz, cur_line, STR_MAX);
-		cur_line[strlen(cur_line)-1] = '\0'; /* get rid of trailing newline */
+		gzgetstr(cur_line, mediagz);
 
-		if(!strcmp(cur_line,"on mouseUp"))
+		if(strcontains(cur_line,"on mouseUp") == 0)
 		{
-			strcpy(cur_line,""); /* reset cur_line */
-
 			/* assign memory for the new media */
 			media = assignnew_med(media, no_of_media);
 
@@ -67,30 +67,22 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 			cur_media->size = 0;
 
 			/* process rev file */
-			while(strcmp(cur_line,"end mouseUp") && gzeof(mediagz)==0)
+			while(strcontains(cur_line,"end mouseUp") && gzeof(mediagz)==0)
 			{
 				strcpy(cur_line,""); /* reset cur_line */
-				gzgets(mediagz, cur_line, STR_MAX);
-				cur_line[strlen(cur_line)-1] = '\0'; /* remove trailing newline */
-
-				removeleadingspace(cur_line);
-
-				if(!strncmp(cur_line,"set the filename of player \"", 28))
-				{
-					/* todo: check if smil, if so follow to find uri */
-					//sscanf(cur_line,"set the filename of player \"player1\" to \"%s\"",cur_media->uri);
-					sscanf(cur_line,"set the filename of player \"%[^\"] to \"%s\"",NULL,cur_media->uri);
-					cur_media->uri[strlen(cur_media->uri)-1] = '\0'; /* workaround extra character */
-				}
-				else if(!strncmp(cur_line,"set the label of this stack to \"",32))
-				{
-					getquote(cur_line,cur_media->title);
-				}
-				else if(!strncmp(cur_line,"statusMsg \(\"",12))
-				{
-					getquote(cur_line,cur_media->comment);
-				}
+				gzgetstr(cur_line, mediagz);
+
+				if(!strcontains(cur_line,"set the filename of player \"") && strcontains(cur_line,"empty"))
+					getquote(cur_line, cur_media->uri, 2);
+				else if(!strcontains(cur_line,"set the label of this stack to"))
+					getquote(cur_line, cur_media->title, 1);
+				else if(!strcontains(cur_line,"statusMsg\(\"") || !strcontains(cur_line,"StatusMsg \(\""))
+					getquote(cur_line, cur_media->comment, 1);
 			}
+
+			/* if it turns out that there was nothing useful there, remove the new media */
+			if(cur_media->uri[0] == '\0')
+				(*no_of_media)--;
 		}
 		strcpy(cur_line,""); /* reset cur_line */
 	}
@@ -98,19 +90,20 @@ med ** parsemediagz(char * media_path, int * no_of_media)
 	return media;
 }
 
-int smilurl(char * smilurl, med * cur_media)
-/*	Extracts url and other data from remote smil file, storing
- *	the info in the cur_media structure. */
+int getquote(char * input, char * quote, int number)
+/*	assigns quote string from a line of the format:
+ *	'something "quote" something' */
 {
-	return 0;
-}
+	int curnum;
 
-void getquote(char * input, char * quote)
-/*	sets quote from a line of the format:
- *	`something "quote" something' */
-{
-	char * cur_pos;
-	cur_pos = quote;
+	/* advance past earlier quotes */
+	for(curnum=0; curnum < ((number*2)-2); curnum++)
+	{
+		/* advance until " character is reached */
+		while(*input != '"' && *input)	
+			input++;
+		input++;
+	}
 
 	/* advance until " character is reached */
 	while(*input != '"' && *input)	
@@ -119,33 +112,67 @@ void getquote(char * input, char * quote)
 	input++;
 
 	/* copy characters in until next '"' */
-	while(*input != '"' && *input)
+	while(*input != '"')
 	{
-		*cur_pos = *input;
-		cur_pos++;
+		/* if end is found without closing bracket,
+		 * exit with error */
+		if(! *input)
+			return 1;
+		*quote = *input;
+		quote++;
 		input++;
 	}
 
-	*cur_pos = '\0';
+	*quote = '\0';
+
+	return 0;
 }
 
-void removeleadingspace(char * cur_line)
+int strcontains(char * source, char * comparison)
 {
-	int tmp, newpos;
+	int srcpos = 0; /* position in source string */
+	int compos = 0; /* position in comparison string */
 
-	char temp_str[STR_MAX];
+	while(srcpos < strlen(source) && srcpos < STR_MAX)
+	{
+		/* if the chars match, move to the next in comparison,
+		 * otherwise only advance the source */
+		if(source[srcpos] == comparison[compos])
+			compos++;
+		else
+			compos = 0;
+
+		/* if we got to the end of comparison, there's a match */
+		if(compos == strlen(comparison))
+			return 0;
+
+		srcpos++;
+	}
 
-	/* advance past whitespace */
-	tmp = 0;
-	while (cur_line[tmp] == ' ' || cur_line[tmp] == '\t')
-		tmp++;
+	/* if we got all the way through the source, there's no match */
+	return 1;
+}
 
-	/* copy from there to temp_str */
-	for(newpos = 0; cur_line[tmp]; tmp++, newpos++)
-		temp_str[newpos] = cur_line[tmp];
+int gzgetstr(char * newstr, gzFile * gzfile)
+/*	a reimplementation of gzgetstr, which doesn't choke at odd characters
+ */
+{
+	strcpy(newstr,""); /* reset cur_line */
+	char c;
 
-	temp_str[newpos] = '\0';
+	while((c = gzgetc(gzfile)) != -1)
+	{
+		if(c == '\n')
+			break;
+
+		/* append the char if there's room in the str */
+		if(strlen(newstr)+1 < STR_MAX)
+			strncat(newstr,&c,1);
+		/* if the line is too long just break to pick up the 2nd half on next pass,
+		 * not perfect (will miss strs cut into 2 sections), but good enough for now */
+		else
+			break;
+	}
 
-	/* copy temp_str to cur_line */
-	strncpy(cur_line, temp_str, sizeof(temp_str));
+	return 0;
 }
diff --git a/src/mediaxml.c b/src/mediaxml.c
index 02c2cad..54ed3ae 100644
--- a/src/mediaxml.c
+++ b/src/mediaxml.c
@@ -228,7 +228,10 @@ iss ** parsemedia(char * filepath, iss ** issue, int * no_of_issues)
 					if(xmlGetProp(itnode, "preview"))
 						strncpy(cur_media->preview_uri, (char *) xmlGetProp(itnode, "preview_uri"), STR_MAX);
 
-					strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX);
+					if((char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1))
+						strncpy(cur_media->title, (char *) xmlNodeListGetString(media_file, itnode->xmlChildrenNode, 1), STR_MAX);
+					else
+						strncpy(cur_media->title, "untitled", STR_MAX);
 				}
 				
 				itnode = itnode->next;