summaryrefslogtreecommitdiff
path: root/removeetags.pl
diff options
context:
space:
mode:
authorNick White <git@njw.me.uk>2013-05-26 21:52:41 +0100
committerNick White <git@njw.me.uk>2013-05-26 21:52:41 +0100
commit5d0c5974e2655a6f66153bffb67c9346c2c4a589 (patch)
tree5f0f2d773dda4bb5f7078c0971cc92cf5c46000d /removeetags.pl
parentb0055fe870a04fbd8eaef669c1ccfb0febfe8bc3 (diff)
downloadoed2dict-5d0c5974e2655a6f66153bffb67c9346c2c4a589.tar.bz2
oed2dict-5d0c5974e2655a6f66153bffb67c9346c2c4a589.zip
Correct e tag removal, correct headword separation
Diffstat (limited to 'removeetags.pl')
-rw-r--r--removeetags.pl4
1 files changed, 4 insertions, 0 deletions
diff --git a/removeetags.pl b/removeetags.pl
new file mode 100644
index 0000000..8de7875
--- /dev/null
+++ b/removeetags.pl
@@ -0,0 +1,4 @@
+# <e> tags seem to duplicate other preceeding tags, so remove them
+#
+# sed doesn't support non-greedy matching, so we're using perl
+s/<e>.*?<\/e>//g