Commit c7dd64c8599aeee9ddbbc69a97ab1073cd3baa2e
1 parent
5583bbf9
Exists in
master
and in
1 other branch
Implementação inicial do dockerfile para projeto OCR-SERVER
Showing
3 changed files
with
62 additions
and
2 deletions
Show diff stats
| @@ -0,0 +1,46 @@ | @@ -0,0 +1,46 @@ | ||
| 1 | +FROM ubuntu | ||
| 2 | + | ||
| 3 | +RUN apt-get -y update | ||
| 4 | +RUN apt-get -y install libfile-find-rule-perl libfile-find-rule-perl-perl imagemagick gettext tesseract-ocr-por tesseract-ocr-eng pdftk poppler-utils unpaper | ||
| 5 | + | ||
| 6 | +RUN apt-get -y install git | ||
| 7 | +RUN git clone https://github.com/coherentgraphics/cpdf-binaries.git | ||
| 8 | +RUN cp cpdf-binaries/Linux-Intel-64bit/cpdf /usr/bin | ||
| 9 | + | ||
| 10 | +RUN useradd -m ocr | ||
| 11 | + | ||
| 12 | +COPY usr/local/bin/ocr /usr/local/bin/ocr | ||
| 13 | +RUN chmod +x /usr/local/bin/ocr | ||
| 14 | +COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr | ||
| 15 | + | ||
| 16 | +RUN chmod +x /etc/init.d/ocr; update-rc.d ocr defaults | ||
| 17 | + | ||
| 18 | +RUN apt-get -y install build-essential | ||
| 19 | +RUN perl -MCPAN -e 'install File::Touch' | ||
| 20 | + | ||
| 21 | +RUN mkdir /var/ocr-server/ | ||
| 22 | +RUN mkdir -p /var/ocr-server/Entrada | ||
| 23 | +RUN mkdir -p /var/ocr-server/Saida | ||
| 24 | +RUN mkdir -p /var/ocr-server/Originais_Processados | ||
| 25 | +RUN mkdir -p /var/ocr-server/Erro | ||
| 26 | +COPY entrypoint.sh /entrypoint.sh | ||
| 27 | +RUN chmod +x entrypoint.sh | ||
| 28 | + | ||
| 29 | +RUN perl -MCPAN -e 'install File::Find::Rule;' | ||
| 30 | +RUN perl -MCPAN -e 'install File::Touch;' | ||
| 31 | +RUN perl -MCPAN -e 'install Sys::Syslog;' | ||
| 32 | +RUN perl -MCPAN -e 'install IPC::Open3;' | ||
| 33 | +RUN perl -MCPAN -e 'install IO::Select;' | ||
| 34 | + | ||
| 35 | +RUN apt-get -y install tesseract-ocr | ||
| 36 | +RUN ln -s /usr/bin/pdftk /usr/local/bin/pdftk | ||
| 37 | +RUN ln -s /usr/bin/pdfimages /usr/local/bin/pdfimages | ||
| 38 | +RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract | ||
| 39 | +RUN ln -s /usr/bin/pdfinfo /usr/local/bin/pdfinfo | ||
| 40 | +RUN ln -s /usr/bin/pdffonts /usr/local/bin/pdffonts | ||
| 41 | +RUN ln -s /usr/bin/pdftoppm /usr/local/bin/pdftoppm | ||
| 42 | +RUN ln -s /usr/bin/cpdf /usr/local/bin/cpdf | ||
| 43 | + | ||
| 44 | +VOLUME /var/ocr-server/ | ||
| 45 | +CMD ["bash", "entrypoint.sh"] | ||
| 46 | + |
| @@ -0,0 +1,12 @@ | @@ -0,0 +1,12 @@ | ||
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +mkdir /var/ocr-server/ | ||
| 4 | +mkdir /var/ocr-server/Entrada | ||
| 5 | +mkdir /var/ocr-server/Saida | ||
| 6 | +mkdir /var/ocr-server/Originais_Processados | ||
| 7 | +mkdir /var/ocr-server/Erro | ||
| 8 | +chmod -R 777 /var/ocr-server | ||
| 9 | + | ||
| 10 | +service ocr start | ||
| 11 | + | ||
| 12 | +tail -f /var/log/dmesg |
usr/local/bin/ocr
| @@ -84,8 +84,10 @@ my $CONVERT = '/usr/bin/convert'; | @@ -84,8 +84,10 @@ my $CONVERT = '/usr/bin/convert'; | ||
| 84 | # If it is needed further filtering | 84 | # If it is needed further filtering |
| 85 | #my $FILTER = '/usr/local/bin/textcleaner -g -e stretch -f 25 -o 10 -u -s 1 -T -p 10 '; | 85 | #my $FILTER = '/usr/local/bin/textcleaner -g -e stretch -f 25 -o 10 -u -s 1 -T -p 10 '; |
| 86 | 86 | ||
| 87 | -my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', | ||
| 88 | - '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); | 87 | +#my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', |
| 88 | +# '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); | ||
| 89 | + | ||
| 90 | +my @BASE_DIRS = ('/var/ocr-server/'); | ||
| 89 | 91 | ||
| 90 | my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados', 'TEMP'=>'/tmp/ocr_tmp', 'ERROR' => 'Erro' ); | 92 | my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados', 'TEMP'=>'/tmp/ocr_tmp', 'ERROR' => 'Erro' ); |
| 91 | 93 |