summaryrefslogtreecommitdiff
path: root/htmtojargon.awk
blob: 265c1c6d1a76f6633737c04fdb2621338aa5fda8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/usr/bin/awk -f
# dirty xml reading is more fun
# requires nawk / gawk for sub()

BEGIN {
	FS = "</hg>"
}

{ printdefs($1, $2); }

function printdefs(word, defs) {
	# Split the headwords and other information about the word. This
	# ensures that e.g. 'n.' queries don't match every noun.
	# To be used in conjuction with an argument to dictfmt like this:
	#   --index-data-separator "	"
	sub("<i>", "	", word);

	n = split(defs, array, "<def>");
	for(i=0; i<=n; i++) {
		if(array[i] != "") {
			printf(":%s: %s\n", word, array[i]);
		}
	}
}