summaryrefslogtreecommitdiff
path: root/htmtojargon.awk
blob: 323b7eea1f33eef685e964ec9f66510be36266d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#!/usr/bin/awk -f
# dirty xml reading is more fun
# requires nawk / gawk for sub()

BEGIN {
	FS = "</hg>"
}

{ printdefs($1, $2); }

function printdefs(word, defs) {
	# split the headwords and other information about the word,
	# to be used in conjuction with dictfmt's --index-data-separator
	sub("<i>", "	", word);

	n = split(defs, array, "<def>");
	for(i=0; i<=n; i++) {
		if(array[i] != "") {
			printf(":%s: %s\n", word, array[i]);
		}
	}
}