summaryrefslogtreecommitdiff
path: root/htmtojargon.awk
diff options
context:
space:
mode:
Diffstat (limited to 'htmtojargon.awk')
-rw-r--r--htmtojargon.awk5
1 files changed, 5 insertions, 0 deletions
diff --git a/htmtojargon.awk b/htmtojargon.awk
index 16c5356..323b7ee 100644
--- a/htmtojargon.awk
+++ b/htmtojargon.awk
@@ -1,5 +1,6 @@
#!/usr/bin/awk -f
# dirty xml reading is more fun
+# requires nawk / gawk for sub()
BEGIN {
FS = "</hg>"
@@ -8,6 +9,10 @@ BEGIN {
{ printdefs($1, $2); }
function printdefs(word, defs) {
+ # split the headwords and other information about the word,
+ # to be used in conjuction with dictfmt's --index-data-separator
+ sub("<i>", " ", word);
+
n = split(defs, array, "<def>");
for(i=0; i<=n; i++) {
if(array[i] != "") {