remove_backup_email.py 4.85 KB
#!/usr/bin/python

import pickle, glob, os, subprocess, re, shutil, pickle

allEmailDict = dict()
globalCount = 1

last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'),
                     key=os.path.getmtime)[-1]

def decompress():
    print "=" * 30
    print "This gonna take some time..."
    subprocess.call(["mkdir -p gitlab"], shell=True)
    subprocess.call(["mkdir -p noosfero"], shell=True)
    subprocess.call(["mkdir -p mailman"], shell=True)
    print "[1] Extracting gitlab..."
    subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True)
    print "[2] Extracting noosfero..."
    subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True)
    print "[3] Extracting mailman..."
    subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True)

def compress():
    print "=" * 30
    print "Compressing things again..."

    print "[1] Compressing gitlab..."
    command = "cd gitlab && tar -cpf " + last_gitlab + " * &&  mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab"
    subprocess.call([command], shell=True)

    print "[2] Compressing noosfero..."
    command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * &&  mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero'
    subprocess.call([command], shell=True)

    print "[3] Compressing mailman..."
    command = 'cd mailman && tar -czpf mailman_backup.tar.gz * &&  mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman'
    subprocess.call([command], shell=True)

def create_hashes_from_file (pFile):
    global globalCount
    global allEmailDict
    # Match email
    pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
    # Build a hash with all emails
    with open(pFile) as current:
        for line in current:
            listOfEmail = pattern.findall(line)
            for email in listOfEmail:
                if not email in allEmailDict:
                    allEmailDict[email] = "email" + `globalCount` + "@example.com"
                    globalCount += 1

def create_hashes_for_mailman (pDictionary):
    global globalCount
    global allEmailDict
    pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')

    for key, value in pDictionary.iteritems():
        if isinstance(value, basestring):
            matchValue = pattern.search(value)
        if matchValue:
            allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
            globalCount += 1

        if isinstance(key, basestring):
            matchKey = pattern.search(key)
        if matchKey:
            allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
            globalCount += 1

def replace_mailman(pDictionary):
    for key, value in pDictionary:
        if key in allEmailDict:
            pDictionary[allEmailDict[key]] = pDictionary[key]
            del pDictionary[key]
        if value in allEmailDict:
            pDictionary[key] = allEmailDict[value]
    return pDictionary

def replace_hashes_in_file (pFile):
    tmp_file = 'tmp_file'
    with open(pFile) as current:
         contents = current.read()
    for key, value in allEmailDict.iteritems():
        contents = contents.replace(key, value)
    target = open(tmp_file, 'w+')
    target.write(contents)
    target.close()
    shutil.move(tmp_file, pFile)

def build_backup_list():
    listOfbkpFiles = []
    # Noosfero
    for file in os.listdir("noosfero/tmp/backup/"):
        if file.endswith(".sql"):
            listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file))
    # Colab and gitlab
    listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"]
    # Mailman
    mailman = []
    for root, dirs, files in os.walk("mailman/lists/"):
        for file in files:
            if file.endswith("config.pck"):
                mailman.append(os.path.join(root, file))
    return listOfbkpFiles, mailman

def unserializable_and_replace(pMailconfig):
    emailsDict = pickle.load(open(pMailconfig, "rb" ))
    return emailsDict

def serializable_new_config(swap, mailconfig):
    pass

if __name__ == "__main__":
    #decompress()
    #compress()
    others, mailMan = build_backup_list()

    #Others: colab, gitlab, and Noosfero
    print ("=" * 30)
    print ("Creating mapping for all emails: pass through all applications")
    for applications in others:
        create_hashes_from_file(applications)
    print ("now, mailman...")
    for configMailman in mailMan:
        print configMailman
        swap = unserializable_and_replace(configMailman)
        create_hashes_for_mailman(swap)
    print allEmailDict

        
    #for application in others:
    #    print ("Working on: " + application)
    #    create_hashes_from_file(application)
    #    replace_hashes_in_file(application)

    #print ("Working on: Mailman")
    #for mailconfig in mailMan:
    #    print ("File: " + mailconfig)
    #    serializable_new_config(swap, mailconfig)