From c7dd64c8599aeee9ddbbc69a97ab1073cd3baa2e Mon Sep 17 00:00:00 2001 From: Guilherme Andrade Del Cantoni Date: Wed, 7 Jun 2017 18:45:27 -0300 Subject: [PATCH] Implementação inicial do dockerfile para projeto OCR-SERVER --- Dockerfile | 46 ++++++++++++++++++++++++++++++++++++++++++++++ entrypoint.sh | 12 ++++++++++++ usr/local/bin/ocr | 6 ++++-- 3 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 Dockerfile create mode 100644 entrypoint.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e1113f7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +FROM ubuntu + +RUN apt-get -y update +RUN apt-get -y install libfile-find-rule-perl libfile-find-rule-perl-perl imagemagick gettext tesseract-ocr-por tesseract-ocr-eng pdftk poppler-utils unpaper + +RUN apt-get -y install git +RUN git clone https://github.com/coherentgraphics/cpdf-binaries.git +RUN cp cpdf-binaries/Linux-Intel-64bit/cpdf /usr/bin + +RUN useradd -m ocr + +COPY usr/local/bin/ocr /usr/local/bin/ocr +RUN chmod +x /usr/local/bin/ocr +COPY etc/init.d/ocr-ubuntu /etc/init.d/ocr + +RUN chmod +x /etc/init.d/ocr; update-rc.d ocr defaults + +RUN apt-get -y install build-essential +RUN perl -MCPAN -e 'install File::Touch' + +RUN mkdir /var/ocr-server/ +RUN mkdir -p /var/ocr-server/Entrada +RUN mkdir -p /var/ocr-server/Saida +RUN mkdir -p /var/ocr-server/Originais_Processados +RUN mkdir -p /var/ocr-server/Erro +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x entrypoint.sh + +RUN perl -MCPAN -e 'install File::Find::Rule;' +RUN perl -MCPAN -e 'install File::Touch;' +RUN perl -MCPAN -e 'install Sys::Syslog;' +RUN perl -MCPAN -e 'install IPC::Open3;' +RUN perl -MCPAN -e 'install IO::Select;' + +RUN apt-get -y install tesseract-ocr +RUN ln -s /usr/bin/pdftk /usr/local/bin/pdftk +RUN ln -s /usr/bin/pdfimages /usr/local/bin/pdfimages +RUN ln -s /usr/bin/tesseract /usr/local/bin/tesseract +RUN ln -s /usr/bin/pdfinfo /usr/local/bin/pdfinfo +RUN ln -s /usr/bin/pdffonts /usr/local/bin/pdffonts +RUN ln -s /usr/bin/pdftoppm /usr/local/bin/pdftoppm +RUN ln -s /usr/bin/cpdf /usr/local/bin/cpdf + +VOLUME /var/ocr-server/ +CMD ["bash", "entrypoint.sh"] + diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..85519bc --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +mkdir /var/ocr-server/ +mkdir /var/ocr-server/Entrada +mkdir /var/ocr-server/Saida +mkdir /var/ocr-server/Originais_Processados +mkdir /var/ocr-server/Erro +chmod -R 777 /var/ocr-server + +service ocr start + +tail -f /var/log/dmesg diff --git a/usr/local/bin/ocr b/usr/local/bin/ocr index 2af3d31..056adac 100644 --- a/usr/local/bin/ocr +++ b/usr/local/bin/ocr @@ -84,8 +84,10 @@ my $CONVERT = '/usr/bin/convert'; # If it is needed further filtering #my $FILTER = '/usr/local/bin/textcleaner -g -e stretch -f 25 -o 10 -u -s 1 -T -p 10 '; -my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', - '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); +#my @BASE_DIRS = ( '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS PROTOCOLO/OCR/', +# '/mnt/protocolo_sede/DIGITALIZAÇÃO/ARQUIVOS_PROCESSOS/OCR/' ); + +my @BASE_DIRS = ('/var/ocr-server/'); my %SUB_DIRS = ( 'IN'=>'Entrada', 'OUT'=>'Saida', 'PROC'=>'Originais_Processados', 'TEMP'=>'/tmp/ocr_tmp', 'ERROR' => 'Erro' ); -- libgit2 0.21.2