Commit cb6225a47553f56ff1a7086c544b0eccf14f61d9
1 parent
bcbd6d9a
Exists in
master
and in
1 other branch
Implementação do container Docker para provisionamento do ocr-server
Showing
3 changed files
with
89 additions
and
28 deletions
Show diff stats
Dockerfile
1 | -FROM ubuntu | ||
2 | 1 | ||
2 | +FROM ubuntu:14.04 | ||
3 | + | ||
4 | +# Cópia de arquivos do projeto OCR-SERVER | ||
3 | COPY usr/local/bin/ocr /usr/local/bin/ocr | 5 | COPY usr/local/bin/ocr /usr/local/bin/ocr |
4 | COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr | 6 | COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr |
5 | COPY entrypoint.sh /entrypoint.sh | 7 | COPY entrypoint.sh /entrypoint.sh |
6 | 8 | ||
7 | RUN useradd -m ocr | 9 | RUN useradd -m ocr |
8 | 10 | ||
9 | -RUN apt-get -y update && \ | ||
10 | - apt-get -y install libfile-find-rule-perl libfile-find-rule-perl-perl imagemagick tesseract-ocr \ | ||
11 | - gettext tesseract-ocr-por tesseract-ocr-eng pdftk poppler-utils unpaper git build-essential | 11 | +RUN apt-get -y update |
12 | +RUN apt-get -y install build-essential cmake libtool yasm pkg-config subversion git libgcj14 apt-utils | ||
13 | +RUN apt-get -y install curl | ||
14 | +RUN apt-get -y install libtiff-dev libpng-dev libopenjpeg-dev libjpeg8-dev libjpeg-turbo8-dev libjpeg-dev libgif-dev zlib1g-dev libicu-dev libpango1.0-dev libcairo2-dev libfontconfig1-dev libgettextpo-dev libnss3-dev | ||
15 | +#RUN apt-get -y install ttf-mscorefonts-installer | ||
16 | + | ||
17 | +RUN apt-get -y install wget cabextract xfonts-utils | ||
18 | +RUN wget -O mscorefonts.deb http://ftp.us.debian.org/debian/pool/contrib/m/msttcorefonts/ttf-mscorefonts-installer_3.4+nmu1_all.deb && \ | ||
19 | + dpkg -i mscorefonts.deb | ||
20 | + | ||
21 | +# Instalação do Perl 5.1 e demais módulos | ||
22 | +RUN apt-get -y install perl | ||
23 | +RUN perl -MCPAN -e 'install File::Touch' | ||
24 | +RUN perl -MCPAN -e 'install File::Find::Rule;' | ||
25 | +RUN perl -MCPAN -e 'install File::Touch;' | ||
26 | +RUN perl -MCPAN -e 'install Sys::Syslog;' | ||
27 | +RUN perl -MCPAN -e 'install IPC::Open3;' | ||
28 | +RUN perl -MCPAN -e 'install IO::Select;' | ||
29 | + | ||
30 | +# Tesseract-ocr 3.05, com dicionários inglês e português | ||
31 | +# Bibliotecas para o Tesseract: Leptonica | ||
32 | +RUN git clone https://github.com/DanBloomberg/leptonica.git | ||
33 | +RUN apt-get -y install automake | ||
34 | +RUN cd leptonica && ./autobuild && ./configure && make all install | ||
35 | + | ||
36 | +# Bibliotecas para o Tesseract: Libav | ||
37 | +RUN git clone https://github.com/libav/libav.git | ||
38 | +RUN export PKG_CONFIG_PATH=/usr/lib:/usr/local/lib:/usr/local/src/leptonica/ && \ | ||
39 | + cd libav && ./configure --enable-sram && make all install -w | ||
40 | + | ||
41 | +# Tesseract | ||
42 | +RUN git clone https://github.com/tesseract-ocr/tesseract.git | ||
43 | +RUN apt-get -y install autoconf-archive | ||
44 | +RUN cd tesseract && ./autogen.sh && ./configure && make all install | ||
45 | + | ||
46 | +#RUN git clone https://github.com/tesseract-ocr/tessdata.git | ||
47 | +#RUN cp -avR tessdata/* /usr/local/share/tessdata/ | ||
48 | +RUN wget https://github.com/tesseract-ocr/tessdata/blob/master/eng.traineddata?raw=true -O /usr/local/share/tessdata/eng.traineddata && \ | ||
49 | + wget https://github.com/tesseract-ocr/tessdata/blob/master/por.traineddata?raw=true -O /usr/local/share/tessdata/por.traineddata && \ | ||
50 | + wget https://github.com/tesseract-ocr/tessdata/blob/master/osd.traineddata?raw=true -O /usr/local/share/tessdata/osd.traineddata | ||
51 | + | ||
52 | +# export LIBCURL_LIBS=/usr/lib/x86_64-linux-gnu/ && \ | ||
53 | +RUN apt-get -y install libcurl4-gnutls-dev | ||
54 | +RUN git clone https://anongit.freedesktop.org/git/poppler/poppler.git && \ | ||
55 | + cd poppler && ./autogen.sh && ./configure --enable-cmyk --enable-libcurl && make all install && cd .. | ||
56 | + | ||
57 | +# pdftk, versão 2.02 ou superior | ||
58 | +RUN wget https://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/pdftk-2.02-src.zip | ||
59 | +RUN apt-get -y install unzip libgcj14 | ||
60 | +RUN unzip pdftk-2.02-src.zip && rm -f pdftk-2.02-src.zip | ||
61 | + | ||
62 | +RUN apt-get -y update | ||
63 | +RUN apt-get -y install build-essential libfile-find-rule-perl libfile-find-rule-perl-perl imagemagick | ||
64 | +RUN apt-get -y install gettext unpaper git | ||
65 | +RUN apt-get -y install libtiff5 libpng12-0 libjpeg-turbo8 zlib1g libpango1.0-0 libcairo2 fontconfig libwebp5 | ||
66 | +RUN apt-get -y install libpango1.0-0 libcairo2 libfontconfig1 libgettextpo0 | ||
67 | +RUN apt-get -y install pkg-config libgcj14 libtiff-dev libpng-dev libopenjpeg-dev libjpeg8-dev libjpeg-turbo8-dev libjpeg-dev libgif-dev | ||
68 | +RUN apt-get -y install zlib1g-dev libicu-dev libpango1.0-dev libcairo2-dev libfontconfig1-dev libgettextpo-dev libnss3-dev | ||
69 | + | ||
70 | +RUN apt-get install -y gcc | ||
71 | +RUN apt-get install -y gcj-jdk | ||
72 | +RUN cd pdftk-2.02-dist/pdftk && make -f Makefile.Redhat all install | ||
73 | + | ||
74 | +# Ghostscript 9.18 ou superior | ||
75 | +RUN wget http://downloads.ghostscript.com/public/old-gs-releases/ghostscript-9.18.tar.gz | ||
76 | +RUN tar xvozf ghostscript-9.18.tar.gz && rm -f ghostscript-9.18.tar.gz | ||
77 | +RUN cd ghostscript-9.18 && ls && ./autogen.sh; ./configure && make all install | ||
12 | 78 | ||
13 | RUN git clone https://github.com/coherentgraphics/cpdf-binaries.git && \ | 79 | RUN git clone https://github.com/coherentgraphics/cpdf-binaries.git && \ |
14 | cp cpdf-binaries/Linux-Intel-64bit/cpdf /usr/bin | 80 | cp cpdf-binaries/Linux-Intel-64bit/cpdf /usr/bin |
15 | 81 | ||
16 | -RUN perl -MCPAN -e 'install File::Touch' && \ | ||
17 | - perl -MCPAN -e 'install File::Find::Rule;' && \ | ||
18 | - perl -MCPAN -e 'install File::Touch;' && \ | ||
19 | - perl -MCPAN -e 'install Sys::Syslog;' && \ | ||
20 | - perl -MCPAN -e 'install IPC::Open3;' && \ | ||
21 | - perl -MCPAN -e 'install IO::Select;' | ||
22 | - | ||
23 | RUN chmod +x /usr/local/bin/ocr && \ | 82 | RUN chmod +x /usr/local/bin/ocr && \ |
24 | chmod +x /etc/init.d/ocr && \ | 83 | chmod +x /etc/init.d/ocr && \ |
25 | update-rc.d ocr defaults | 84 | update-rc.d ocr defaults |
@@ -28,18 +87,16 @@ RUN mkdir /var/ocr-server/ && \ | @@ -28,18 +87,16 @@ RUN mkdir /var/ocr-server/ && \ | ||
28 | mkdir -p /var/ocr-server/Entrada && \ | 87 | mkdir -p /var/ocr-server/Entrada && \ |
29 | mkdir -p /var/ocr-server/Saida && \ | 88 | mkdir -p /var/ocr-server/Saida && \ |
30 | mkdir -p /var/ocr-server/Originais_Processados && \ | 89 | mkdir -p /var/ocr-server/Originais_Processados && \ |
31 | - mkdir -p /var/ocr-server/Erro | ||
32 | -RUN chmod +x entrypoint.sh | 90 | + mkdir -p /var/ocr-server/Erro && \ |
91 | + chmod +x entrypoint.sh | ||
33 | 92 | ||
34 | -RUN ln -s /usr/bin/pdftk /usr/local/bin/pdftk && \ | ||
35 | - ln -s /usr/bin/pdfimages /usr/local/bin/pdfimages && \ | ||
36 | - ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \ | ||
37 | - ln -s /usr/bin/pdfinfo /usr/local/bin/pdfinfo && \ | ||
38 | - ln -s /usr/bin/pdffonts /usr/local/bin/pdffonts && \ | ||
39 | - ln -s /usr/bin/pdftoppm /usr/local/bin/pdftoppm && \ | ||
40 | - ln -s /usr/bin/cpdf /usr/local/bin/cpdf | 93 | +RUN mkdir -p /tmp/ocr_dev/ && \ |
94 | + mkdir -p /tmp/ocr_dev/Entrada && \ | ||
95 | + mkdir -p /tmp/ocr_dev/Saida && \ | ||
96 | + mkdir -p /tmp/ocr_dev/Originais_Processados && \ | ||
97 | + mkdir -p /tmp/ocr_dev/Erro && \ | ||
98 | + chmod -R 777 /tmp/ocr_dev | ||
41 | 99 | ||
42 | VOLUME /var/ocr-server/ | 100 | VOLUME /var/ocr-server/ |
43 | 101 | ||
44 | -CMD ["bash", "entrypoint.sh"] | ||
45 | - | 102 | +CMD ["bash", "entrypoint.sh"] |
46 | \ No newline at end of file | 103 | \ No newline at end of file |
entrypoint.sh
1 | #!/usr/bin/env bash | 1 | #!/usr/bin/env bash |
2 | 2 | ||
3 | -mkdir /var/ocr-server/ | ||
4 | -mkdir /var/ocr-server/Entrada | ||
5 | -mkdir /var/ocr-server/Saida | ||
6 | -mkdir /var/ocr-server/Originais_Processados | ||
7 | -mkdir /var/ocr-server/Erro | 3 | +mkdir -p /var/ocr-server/ |
4 | +mkdir -p /var/ocr-server/Entrada | ||
5 | +mkdir -p /var/ocr-server/Saida | ||
6 | +mkdir -p /var/ocr-server/Originais_Processados | ||
7 | +mkdir -p /var/ocr-server/Erro | ||
8 | chmod -R 777 /var/ocr-server | 8 | chmod -R 777 /var/ocr-server |
9 | 9 | ||
10 | service ocr start | 10 | service ocr start |
usr/local/bin/ocr
@@ -77,7 +77,11 @@ use Sys::Hostname; | @@ -77,7 +77,11 @@ use Sys::Hostname; | ||
77 | use IPC::Open3; | 77 | use IPC::Open3; |
78 | use IO::Select; | 78 | use IO::Select; |
79 | 79 | ||
80 | -my $DEBUG = 2; | 80 | +use Getopt::Std; |
81 | +use vars qw( $opt_d ); | ||
82 | + | ||
83 | +#my $DEBUG = 2 | ||
84 | +my $DEBUG = ($opt_d ? 2 : 0); | ||
81 | my $MAX_PGS = ($DEBUG==2 ? 1 : 0 + `cat /proc/cpuinfo | grep -e '^processor' | wc -l`); | 85 | my $MAX_PGS = ($DEBUG==2 ? 1 : 0 + `cat /proc/cpuinfo | grep -e '^processor' | wc -l`); |
82 | my $MAX_FILES = ( !$DEBUG ? 2 : 1) ; | 86 | my $MAX_FILES = ( !$DEBUG ? 2 : 1) ; |
83 | 87 |