summaryrefslogtreecommitdiff
path: root/htmtojargon.awk
blob: f67cd4990dd22e5f6071bae2f0b296f39f4bf009 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/awk -f
# dirty xml reading is more fun
# requires nawk / gawk for sub()

BEGIN {
	FS = "</hg>"
}

{ printdefs($1, $2); }

function printdefs(word, defs) {
	# Split the headwords and other information about the word. This
	# ensures that e.g. 'n.' queries don't match every noun.
	# To be used in conjuction with an argument to dictfmt like this:
	#   --index-data-separator "	"
	# This is disabled at the moment as GoldenDict prior to 2c901d5
	# (2013-06-16) had a bug that would ignore any entry with this
	# extra information in the index.
	#sub("<i>", "	", word);

	n = split(defs, array, "<def>");
	for(i=0; i<=n; i++) {
		if(array[i] != "") {
			printf(":%s: %s\n", word, array[i]);
		}
	}
}