diff options
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | TODO | 2 | ||||
-rw-r--r-- | extras/mkdjvu.sh | 24 | ||||
-rw-r--r-- | extras/mkocrdjvu.sh | 41 | ||||
-rwxr-xr-x | extras/mkpdf.sh | 1 |
5 files changed, 67 insertions, 3 deletions
@@ -7,7 +7,7 @@ SRC = getgbook.c getabook.c getbnbook.c LIB = util.o GUI = getxbookgui.tcl DOC = README COPYING INSTALL LEGAL -EXTRAS = extras/mkpdf.sh extras/mkocrpdf.sh extras/mkocrtxt.sh +EXTRAS = extras/mkpdf.sh extras/mkocrpdf.sh extras/mkdjvu.sh extras/mkocrtxt.sh extras/mkocrdjvu.sh BIN = $(SRC:.c=) MAN = $(SRC:.c=.1) @@ -8,8 +8,6 @@ in getgbook, check that downloaded page doesn't match 'page not available' image package for osx - https://github.com/kennethreitz/osx-gcc-installer -add djvu convert script - use something smarter than update in gui to stop freezing add https support to get (getabook can use it everywhere, others cannot) diff --git a/extras/mkdjvu.sh b/extras/mkdjvu.sh new file mode 100644 index 0000000..6e89235 --- /dev/null +++ b/extras/mkdjvu.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# See COPYING file for copyright and license details. +# +# Makes a DjVu +# Requires imagemagick and djvulibre + +test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 +cd "$1" || exit 1 + +for i in `ls` +do + echo "$i" + + convert "$i" "$i.ppm" + c44 "$i.ppm" "$i.djvu" + + rm -f "$i.ppm" +done + +djvm -c book.djvu *.djvu + +rm -f [0-9]*djvu + +echo "$1/book.djvu" diff --git a/extras/mkocrdjvu.sh b/extras/mkocrdjvu.sh new file mode 100644 index 0000000..c1207f5 --- /dev/null +++ b/extras/mkocrdjvu.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# See COPYING file for copyright and license details. +# +# Makes a DjVu with embedded text extracted by tesseract +# Requires imagemagick, djvulibre and tesseract 3 +# +# Note that this doesn't use bounding box info, so that text +# reflows much better. + +test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 +cd "$1" || exit 1 + +for i in `ls` +do + echo "$i" + + # create djvu compressed version + convert "$i" "$i.ppm" + c44 "$i.ppm" "$i.djvu" + + # create a much bigger version of the page image, for better + # tesseract accuracy + width=`identify "$i" |awk '{print $3}'|awk -F x '{print $1}'` + height=`identify "$i" |awk '{print $3}'|awk -F x '{print $2}'` + bigwidth=`expr $width \* 4` + convert "$i" -geometry ${bigwidth}x "$i.tif" + + tesseract "$i.tif" "$i" 2>&1 | sed '/Tesseract Open Source OCR Engine/d' + + # convert tesseract output into djvused input + (printf "(page 0 0 $width $height \"";sed 's/"/\\"/g;'"s/\'/\\\'/g" < "$i.txt";printf \"")\n") > "$i.djvutxt" + djvused "$i.djvu" -e "select 1; set-txt $i.djvutxt" -s + + rm -f "$i.ppm" "$i.tif" "$i.txt" "$i.djvutxt" +done + +djvm -c book.djvu *.djvu + +rm -f [0-9]*djvu + +echo "$1/book.djvu" diff --git a/extras/mkpdf.sh b/extras/mkpdf.sh index a2ba2c7..6ea7e08 100755 --- a/extras/mkpdf.sh +++ b/extras/mkpdf.sh @@ -1,6 +1,7 @@ #!/bin/sh # See COPYING file for copyright and license details. # +# Makes a PDF # Requires imagemagick test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1 |