Commit bcbd6d9adcf8af6f5cbccb49e06c1e775fc54c49
Exists in
master
and in
1 other branch
Merge branch 'master' into Pre_versao_2.0
Showing
3 changed files
with
61 additions
and
2 deletions
Show diff stats
@@ -0,0 +1,45 @@ | @@ -0,0 +1,45 @@ | ||
1 | +FROM ubuntu | ||
2 | + | ||
3 | +COPY usr/local/bin/ocr /usr/local/bin/ocr | ||
4 | +COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr | ||
5 | +COPY entrypoint.sh /entrypoint.sh | ||
6 | + | ||
7 | +RUN useradd -m ocr | ||
8 | + | ||
9 | +RUN apt-get -y update && \ | ||
10 | + apt-get -y install libfile-find-rule-perl libfile-find-rule-perl-perl imagemagick tesseract-ocr \ | ||
11 | + gettext tesseract-ocr-por tesseract-ocr-eng pdftk poppler-utils unpaper git build-essential | ||
12 | + | ||
13 | +RUN git clone https://github.com/coherentgraphics/cpdf-binaries.git && \ | ||
14 | + cp cpdf-binaries/Linux-Intel-64bit/cpdf /usr/bin | ||
15 | + | ||
16 | +RUN perl -MCPAN -e 'install File::Touch' && \ | ||
17 | + perl -MCPAN -e 'install File::Find::Rule;' && \ | ||
18 | + perl -MCPAN -e 'install File::Touch;' && \ | ||
19 | + perl -MCPAN -e 'install Sys::Syslog;' && \ | ||
20 | + perl -MCPAN -e 'install IPC::Open3;' && \ | ||
21 | + perl -MCPAN -e 'install IO::Select;' | ||
22 | + | ||
23 | +RUN chmod +x /usr/local/bin/ocr && \ | ||
24 | + chmod +x /etc/init.d/ocr && \ | ||
25 | + update-rc.d ocr defaults | ||
26 | + | ||
27 | +RUN mkdir /var/ocr-server/ && \ | ||
28 | + mkdir -p /var/ocr-server/Entrada && \ | ||
29 | + mkdir -p /var/ocr-server/Saida && \ | ||
30 | + mkdir -p /var/ocr-server/Originais_Processados && \ | ||
31 | + mkdir -p /var/ocr-server/Erro | ||
32 | +RUN chmod +x entrypoint.sh | ||
33 | + | ||
34 | +RUN ln -s /usr/bin/pdftk /usr/local/bin/pdftk && \ | ||
35 | + ln -s /usr/bin/pdfimages /usr/local/bin/pdfimages && \ | ||
36 | + ln -s /usr/bin/tesseract /usr/local/bin/tesseract && \ | ||
37 | + ln -s /usr/bin/pdfinfo /usr/local/bin/pdfinfo && \ | ||
38 | + ln -s /usr/bin/pdffonts /usr/local/bin/pdffonts && \ | ||
39 | + ln -s /usr/bin/pdftoppm /usr/local/bin/pdftoppm && \ | ||
40 | + ln -s /usr/bin/cpdf /usr/local/bin/cpdf | ||
41 | + | ||
42 | +VOLUME /var/ocr-server/ | ||
43 | + | ||
44 | +CMD ["bash", "entrypoint.sh"] | ||
45 | + |
@@ -0,0 +1,12 @@ | @@ -0,0 +1,12 @@ | ||
1 | +#!/usr/bin/env bash | ||
2 | + | ||
3 | +mkdir /var/ocr-server/ | ||
4 | +mkdir /var/ocr-server/Entrada | ||
5 | +mkdir /var/ocr-server/Saida | ||
6 | +mkdir /var/ocr-server/Originais_Processados | ||
7 | +mkdir /var/ocr-server/Erro | ||
8 | +chmod -R 777 /var/ocr-server | ||
9 | + | ||
10 | +service ocr start | ||
11 | + | ||
12 | +tail -f /var/log/dmesg |
usr/local/bin/ocr
@@ -112,8 +112,10 @@ my $CONVERT = 'convert'; | @@ -112,8 +112,10 @@ my $CONVERT = 'convert'; | ||
112 | # If it is needed further filtering | 112 | # If it is needed further filtering |
113 | #my $FILTER = '/usr/local/bin/textcleaner -g -e stretch -f 25 -o 10 -u -s 1 -T -p 10 '; | 113 | #my $FILTER = '/usr/local/bin/textcleaner -g -e stretch -f 25 -o 10 -u -s 1 -T -p 10 '; |
114 | 114 | ||
115 | -my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', | ||
116 | - '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); | 115 | +#my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', |
116 | +# '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); | ||
117 | + | ||
118 | +my @BASE_DIRS = ('/var/ocr-server/'); | ||
117 | 119 | ||
118 | my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados', 'TEMP'=>'/tmp/ocr_tmp', 'ERROR' => 'Erro' ); | 120 | my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados', 'TEMP'=>'/tmp/ocr_tmp', 'ERROR' => 'Erro' ); |
119 | 121 |