summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--TODO2
-rw-r--r--extras/mkdjvu.sh24
-rw-r--r--extras/mkocrdjvu.sh41
-rwxr-xr-xextras/mkpdf.sh1
5 files changed, 67 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index 68a300c..b2c3979 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ SRC = getgbook.c getabook.c getbnbook.c
LIB = util.o
GUI = getxbookgui.tcl
DOC = README COPYING INSTALL LEGAL
-EXTRAS = extras/mkpdf.sh extras/mkocrpdf.sh extras/mkocrtxt.sh
+EXTRAS = extras/mkpdf.sh extras/mkocrpdf.sh extras/mkdjvu.sh extras/mkocrtxt.sh extras/mkocrdjvu.sh
BIN = $(SRC:.c=)
MAN = $(SRC:.c=.1)
diff --git a/TODO b/TODO
index 8f01eec..3c51448 100644
--- a/TODO
+++ b/TODO
@@ -8,8 +8,6 @@ in getgbook, check that downloaded page doesn't match 'page not available' image
package for osx - https://github.com/kennethreitz/osx-gcc-installer
-add djvu convert script
-
use something smarter than update in gui to stop freezing
add https support to get (getabook can use it everywhere, others cannot)
diff --git a/extras/mkdjvu.sh b/extras/mkdjvu.sh
new file mode 100644
index 0000000..6e89235
--- /dev/null
+++ b/extras/mkdjvu.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# See COPYING file for copyright and license details.
+#
+# Makes a DjVu
+# Requires imagemagick and djvulibre
+
+test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1
+cd "$1" || exit 1
+
+for i in `ls`
+do
+ echo "$i"
+
+ convert "$i" "$i.ppm"
+ c44 "$i.ppm" "$i.djvu"
+
+ rm -f "$i.ppm"
+done
+
+djvm -c book.djvu *.djvu
+
+rm -f [0-9]*djvu
+
+echo "$1/book.djvu"
diff --git a/extras/mkocrdjvu.sh b/extras/mkocrdjvu.sh
new file mode 100644
index 0000000..c1207f5
--- /dev/null
+++ b/extras/mkocrdjvu.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+# See COPYING file for copyright and license details.
+#
+# Makes a DjVu with embedded text extracted by tesseract
+# Requires imagemagick, djvulibre and tesseract 3
+#
+# Note that this doesn't use bounding box info, so that text
+# reflows much better.
+
+test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1
+cd "$1" || exit 1
+
+for i in `ls`
+do
+ echo "$i"
+
+ # create djvu compressed version
+ convert "$i" "$i.ppm"
+ c44 "$i.ppm" "$i.djvu"
+
+ # create a much bigger version of the page image, for better
+ # tesseract accuracy
+ width=`identify "$i" |awk '{print $3}'|awk -F x '{print $1}'`
+ height=`identify "$i" |awk '{print $3}'|awk -F x '{print $2}'`
+ bigwidth=`expr $width \* 4`
+ convert "$i" -geometry ${bigwidth}x "$i.tif"
+
+ tesseract "$i.tif" "$i" 2>&1 | sed '/Tesseract Open Source OCR Engine/d'
+
+ # convert tesseract output into djvused input
+ (printf "(page 0 0 $width $height \"";sed 's/"/\\"/g;'"s/\'/\\\'/g" < "$i.txt";printf \"")\n") > "$i.djvutxt"
+ djvused "$i.djvu" -e "select 1; set-txt $i.djvutxt" -s
+
+ rm -f "$i.ppm" "$i.tif" "$i.txt" "$i.djvutxt"
+done
+
+djvm -c book.djvu *.djvu
+
+rm -f [0-9]*djvu
+
+echo "$1/book.djvu"
diff --git a/extras/mkpdf.sh b/extras/mkpdf.sh
index a2ba2c7..6ea7e08 100755
--- a/extras/mkpdf.sh
+++ b/extras/mkpdf.sh
@@ -1,6 +1,7 @@
#!/bin/sh
# See COPYING file for copyright and license details.
#
+# Makes a PDF
# Requires imagemagick
test $# -ne 1 && echo "Usage: $0 bookdir" && exit 1