blob: 5f4a5942d8ece77e188faef83daabfd355e57a34 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
#!/bin/sed -f
# unfortunately they use latin-1 rather than ascii...
# note \x is a gnu extension
s/\xA3/£/g
# seemingly unneeded control characters
s/&ff[0-9];//g
s/xxff0//g
s/xxff3//g
# <q> corresponds to a quote
s/<q>/\n\n/g
# quote text start and end points
s/<qt>/“/g
s/<\/qt>/”/g
# new paragraph
s/∥/\n\n/g
# a date
s/<d>//g
s/<\/d>/:/g
# brackets
s/&obr;/{/g
s/&cbr;/}/g
# space
s/ / /g
# superscript
s/<sup>/ /g
s/<\/sup>//g
# xNNNN codes (unknown what the NNNN refers to; not unicode)
s/<x2145>/`/g
s/<x2146>/'/g
s/<x2150>/-/g
s/<xA043>/../g
|