From 444e7283c19f1f33980800fe77ce1e846107d55d Mon Sep 17 00:00:00 2001 From: rodrigosiqueira Date: Mon, 31 Aug 2015 11:42:04 -0300 Subject: [PATCH] Changed import script. --- utils/remove_backup_email.py | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+), 0 deletions(-) create mode 100644 utils/remove_backup_email.py diff --git a/utils/remove_backup_email.py b/utils/remove_backup_email.py new file mode 100644 index 0000000..3e7a6e5 --- /dev/null +++ b/utils/remove_backup_email.py @@ -0,0 +1,142 @@ +#!/usr/bin/python + +import pickle, glob, os, subprocess, re, shutil, pickle + +allEmailDict = dict() +globalCount = 1 + +last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'), + key=os.path.getmtime)[-1] + +def decompress(): + print "=" * 30 + print "This gonna take some time..." + subprocess.call(["mkdir -p gitlab"], shell=True) + subprocess.call(["mkdir -p noosfero"], shell=True) + subprocess.call(["mkdir -p mailman"], shell=True) + print "[1] Extracting gitlab..." + subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True) + print "[2] Extracting noosfero..." + subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True) + print "[3] Extracting mailman..." + subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True) + +def compress(): + print "=" * 30 + print "Compressing things again..." + + print "[1] Compressing gitlab..." + command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab" + subprocess.call([command], shell=True) + + print "[2] Compressing noosfero..." + command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero' + subprocess.call([command], shell=True) + + print "[3] Compressing mailman..." + command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman' + subprocess.call([command], shell=True) + +def create_hashes_from_file (pFile): + global globalCount + global allEmailDict + # Match email + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') + # Build a hash with all emails + with open(pFile) as current: + for line in current: + listOfEmail = pattern.findall(line) + for email in listOfEmail: + if not email in allEmailDict: + allEmailDict[email] = "email" + `globalCount` + "@example.com" + globalCount += 1 + +def create_hashes_for_mailman (pDictionary): + global globalCount + global allEmailDict + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') + + for key, value in pDictionary.iteritems(): + if isinstance(value, basestring): + matchValue = pattern.search(value) + if matchValue: + allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) + globalCount += 1 + + if isinstance(key, basestring): + matchKey = pattern.search(key) + if matchKey: + allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) + globalCount += 1 + +def replace_mailman(pDictionary): + for key, value in pDictionary: + if key in allEmailDict: + pDictionary[allEmailDict[key]] = pDictionary[key] + del pDictionary[key] + if value in allEmailDict: + pDictionary[key] = allEmailDict[value] + return pDictionary + +def replace_hashes_in_file (pFile): + tmp_file = 'tmp_file' + with open(pFile) as current: + contents = current.read() + for key, value in allEmailDict.iteritems(): + contents = contents.replace(key, value) + target = open(tmp_file, 'w+') + target.write(contents) + target.close() + shutil.move(tmp_file, pFile) + +def build_backup_list(): + listOfbkpFiles = [] + # Noosfero + for file in os.listdir("noosfero/tmp/backup/"): + if file.endswith(".sql"): + listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file)) + # Colab and gitlab + listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"] + # Mailman + mailman = [] + for root, dirs, files in os.walk("mailman/lists/"): + for file in files: + if file.endswith("config.pck"): + mailman.append(os.path.join(root, file)) + return listOfbkpFiles, mailman + +def unserializable_and_replace(pMailconfig): + emailsDict = pickle.load(open(pMailconfig, "rb" )) + return emailsDict + +def serializable_new_config(swap, mailconfig): + pass + +if __name__ == "__main__": + #decompress() + #compress() + others, mailMan = build_backup_list() + + #Others: colab, gitlab, and Noosfero + print ("=" * 30) + print ("Creating mapping for all emails: pass through all applications") + for applications in others: + create_hashes_from_file(applications) + print ("now, mailman...") + for configMailman in mailMan: + print configMailman + swap = unserializable_and_replace(configMailman) + create_hashes_for_mailman(swap) + print allEmailDict + + + #for application in others: + # print ("Working on: " + application) + # create_hashes_from_file(application) + # replace_hashes_in_file(application) + + #print ("Working on: Mailman") + #for mailconfig in mailMan: + # print ("File: " + mailconfig) + # serializable_new_config(swap, mailconfig) + -- libgit2 0.21.2