diff options
author | Nick White <git@njw.me.uk> | 2012-02-02 00:34:03 +0000 |
---|---|---|
committer | Nick White <git@njw.me.uk> | 2012-02-02 00:34:03 +0000 |
commit | e1fada376c4b883d53d6293babb899aa1c510b65 (patch) | |
tree | 2e2d1be58e561060215f10dbdc2455d213ee1280 /extras | |
parent | 3f408e55b4ce41f0e6dc2bab37384e3a29fe1808 (diff) |
Add djvu creation scripts
Diffstat (limited to 'extras')
-rw-r--r-- | extras/mkdjvu.sh | 24 | ||||
-rw-r--r-- | extras/mkocrdjvu.sh | 41 | ||||
-rwxr-xr-x | extras/mkpdf.sh | 1 |
3 files changed, 66 insertions, 0 deletions
diff --git a/extras/mkdjvu.sh b/extras/mkdjvu.sh new file mode 100644 index 0000000..6e89235 --- /dev/null +++ b/extras/mkdjvu.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# See COPYING file for copyright and license details. +# +# Makes a DjVu +# Requires imagemagick and djvulibre + +test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 +cd "$1" || exit 1 + +for i in `ls` +do + echo "$i" + + convert "$i" "$i.ppm" + c44 "$i.ppm" "$i.djvu" + + rm -f "$i.ppm" +done + +djvm -c book.djvu *.djvu + +rm -f [0-9]*djvu + +echo "$1/book.djvu" diff --git a/extras/mkocrdjvu.sh b/extras/mkocrdjvu.sh new file mode 100644 index 0000000..c1207f5 --- /dev/null +++ b/extras/mkocrdjvu.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# See COPYING file for copyright and license details. +# +# Makes a DjVu with embedded text extracted by tesseract +# Requires imagemagick, djvulibre and tesseract 3 +# +# Note that this doesn't use bounding box info, so that text +# reflows much better. + +test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 +cd "$1" || exit 1 + +for i in `ls` +do + echo "$i" + + # create djvu compressed version + convert "$i" "$i.ppm" + c44 "$i.ppm" "$i.djvu" + + # create a much bigger version of the page image, for better + # tesseract accuracy + width=`identify "$i" |awk '{print $3}'|awk -F x '{print $1}'` + height=`identify "$i" |awk '{print $3}'|awk -F x '{print $2}'` + bigwidth=`expr $width \* 4` + convert "$i" -geometry ${bigwidth}x "$i.tif" + + tesseract "$i.tif" "$i" 2>&1 | sed '/Tesseract Open Source OCR Engine/d' + + # convert tesseract output into djvused input + (printf "(page 0 0 $width $height \"";sed 's/"/\\"/g;'"s/\'/\\\'/g" < "$i.txt";printf \"")\n") > "$i.djvutxt" + djvused "$i.djvu" -e "select 1; set-txt $i.djvutxt" -s + + rm -f "$i.ppm" "$i.tif" "$i.txt" "$i.djvutxt" +done + +djvm -c book.djvu *.djvu + +rm -f [0-9]*djvu + +echo "$1/book.djvu" diff --git a/extras/mkpdf.sh b/extras/mkpdf.sh index a2ba2c7..6ea7e08 100755 --- a/extras/mkpdf.sh +++ b/extras/mkpdf.sh @@ -1,6 +1,7 @@ #!/bin/sh # See COPYING file for copyright and license details. # +# Makes a PDF # Requires imagemagick test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 |