summaryrefslogtreecommitdiff
path: root/xmlcleanup.sed
blob: 40c6bde9243ed53ad6c159fd840c06a13811278c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/bin/sed -f

# seemingly unneeded control characters
s/&ff[0-9];//g
s/xxff0//g
s/xxff2//g
s/xxff3//g

# <q> corresponds to a quote
s/<q>/\n\n/g

# quote text start and end points
s/<qt>/“/g
s/<\/qt>/”/g

# new paragraph
s/&par;/\n\n/g

# a date
s/<d>//g
s/<\/d>/:/g

# brackets
s/&obr;/{/g
s/&cbr;/}/g

# space
s/&nbsp;/ /g

# superscript
s/<sup>/ /g
s/<\/sup>//g

# xNNNN codes (unknown what the NNNN refers to; not unicode)
s/<x2145>/`/g
s/<x2146>/'/g
s/<x2150>/-/g
s/<x2151>/—/g
s/<xA043>/../g