#!/usr/bin/python import pickle, glob, os, subprocess, re, shutil, pickle allEmailDict = dict() globalCount = 1 last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'), key=os.path.getmtime)[-1] def decompress(): print "=" * 30 print "This gonna take some time..." subprocess.call(["mkdir -p gitlab"], shell=True) subprocess.call(["mkdir -p noosfero"], shell=True) subprocess.call(["mkdir -p mailman"], shell=True) print "[1] Extracting gitlab..." subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True) print "[2] Extracting noosfero..." subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True) print "[3] Extracting mailman..." subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True) def compress(): print "=" * 30 print "Compressing things again..." print "[1] Compressing gitlab..." command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab" subprocess.call([command], shell=True) print "[2] Compressing noosfero..." command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero' subprocess.call([command], shell=True) print "[3] Compressing mailman..." command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman' subprocess.call([command], shell=True) def create_hashes_from_file (pFile): global globalCount global allEmailDict # Match email pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') # Build a hash with all emails with open(pFile) as current: for line in current: listOfEmail = pattern.findall(line) for email in listOfEmail: if not email in allEmailDict: allEmailDict[email] = "email" + `globalCount` + "@example.com" globalCount += 1 def create_hashes_for_mailman (pDictionary): global globalCount global allEmailDict pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') for key, value in pDictionary.iteritems(): if isinstance(value, basestring): matchValue = pattern.search(value) if matchValue: allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) globalCount += 1 if isinstance(key, basestring): matchKey = pattern.search(key) if matchKey: allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) globalCount += 1 def replace_mailman(pDictionary): for key, value in pDictionary: if key in allEmailDict: pDictionary[allEmailDict[key]] = pDictionary[key] del pDictionary[key] if value in allEmailDict: pDictionary[key] = allEmailDict[value] return pDictionary def replace_hashes_in_file (pFile): tmp_file = 'tmp_file' with open(pFile) as current: contents = current.read() for key, value in allEmailDict.iteritems(): contents = contents.replace(key, value) target = open(tmp_file, 'w+') target.write(contents) target.close() shutil.move(tmp_file, pFile) def build_backup_list(): listOfbkpFiles = [] # Noosfero for file in os.listdir("noosfero/tmp/backup/"): if file.endswith(".sql"): listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file)) # Colab and gitlab listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"] # Mailman mailman = [] for root, dirs, files in os.walk("mailman/lists/"): for file in files: if file.endswith("config.pck"): mailman.append(os.path.join(root, file)) return listOfbkpFiles, mailman def unserializable_and_replace(pMailconfig): emailsDict = pickle.load(open(pMailconfig, "rb" )) return emailsDict def serializable_new_config(swap, mailconfig): pass if __name__ == "__main__": #decompress() #compress() others, mailMan = build_backup_list() #Others: colab, gitlab, and Noosfero print ("=" * 30) print ("Creating mapping for all emails: pass through all applications") for applications in others: create_hashes_from_file(applications) print ("now, mailman...") for configMailman in mailMan: print configMailman swap = unserializable_and_replace(configMailman) create_hashes_for_mailman(swap) print allEmailDict #for application in others: # print ("Working on: " + application) # create_hashes_from_file(application) # replace_hashes_in_file(application) #print ("Working on: Mailman") #for mailconfig in mailMan: # print ("File: " + mailconfig) # serializable_new_config(swap, mailconfig)