extratorTXT.cpp 2.81 KB
/***************************************************************************
 *   Universidade Federal da Paraíba                                       *
 *   Copyright (C) 2014 by Laboratório de Aplicações de Vídeo Digital      *
 *                                                                         *
 *   Centro de Informática - UFPB - Campus I                               *
 *   João Pessoa - PB - Brasil                                             *
 *                                                                         *
 *   Author: Erickson Silva (erickson.silva@lavid.ufpb.br)                 *
 *                                                                         *
 **************************************************************************/

#include "extratorTXT.h"

ExtratorTXT::ExtratorTXT(){
    listeners = new list<ListenerTXT*>();
    finish = false;
    PRINTL(util::_DEBUG, "ExtratorTXT Done!\n");
}

ExtratorTXT::~ExtratorTXT(){
    listeners->clear();
    delete listeners;
    PRINTL(util::_DEBUG, "ExtratorTXT finalized!\n");
}

void ExtratorTXT::addListener(ListenerTXT* listener){
    listeners->push_back(listener);
}

void ExtratorTXT::notifyListeners(unsigned char* line) {
    for(list<ListenerTXT*>::iterator it = listeners->begin(); it != listeners->end(); it++){
        (*it)->notifyLine(line);
    }
}

void ExtratorTXT::notifyEndExtraction(int size) {
    PRINTL(util::_DEBUG, "ExtratorTXT concluiu a extração: %d linhas.\n", size);
    for(list<ListenerTXT*>::iterator it = listeners->begin(); it != listeners->end(); it++){
        (*it)->notifyEnd(size);
    }
}

void ExtratorTXT::encodingfiletoUTF8() {
  string recmd = "recode ";
  recmd.append("$(file --mime-encoding -b ")
  .append(this->filePath).append(")..utf-8 ")
  .append(this->filePath);
  system(recmd.c_str());

  string sedcmd = "sed -i 's/\\r//' ";
  sedcmd.append(this->filePath);
  system(sedcmd.c_str());
}

void ExtratorTXT::setFilePath(char* path){
    this->filePath = path;
    encodingfiletoUTF8();
}

void ExtratorTXT::initialize(){
    ifs_.open(this->filePath, std::ifstream::in);

    if(!(ifs_.is_open() && ifs_.good())) {
        finish = true;
        Logging::instance()->writeLog("extratorTXT.cpp <Error>: Arquivo de texto não encontrado.");
        throw ExtratorException("Falha ao abrir o arquivo de texto! Verifique se o mesmo existe.");
    }

    this->Start();
}

bool ExtratorTXT::isFinished(){
    return finish;
}

void ExtratorTXT::Run(){
    PRINTL(util::_INFO, "Extraindo Texto...\n");

    int line_index = 0;
    string current_line;

    while (ifs_.good()) {
        getline(ifs_, current_line, '\n');

        if (current_line.length() != 0) {
            notifyListeners((unsigned char*) current_line.c_str());
            line_index++;
        }
    }

    finish = true;
    notifyEndExtraction(line_index);
}