Commit d2ef843955101547a6d09ebaad331cac7b92f978

Authored by Luan
1 parent 9e61c6b8

Removing a few whitespaces

src/super_archives/management/commands/import_emails.py
... ... @@ -21,35 +21,35 @@ class Command(BaseCommand, object):
21 21 """Get emails from mailman archives and import them in the django db. """
22 22  
23 23 help = __doc__
24   -
  24 +
25 25 default_archives_path = '/var/lib/mailman/archives/private'
26 26 RE_SUBJECT_CLEAN = re.compile('((re|res|fw|fwd|en|enc):)|\[.*?\]',
27 27 re.IGNORECASE)
28 28 THREAD_CACHE = {}
29 29 EMAIL_ADDR_CACHE = {}
30   -
  30 +
31 31 # A new command line option to get the dump file to parse.
32 32 option_list = BaseCommand.option_list + (
33 33 make_option('--archives_path',
34 34 dest='archives_path',
35   - help='Path of email archives to be imported. (default: %s)' %
  35 + help='Path of email archives to be imported. (default: %s)' %
36 36 default_archives_path,
37 37 default=default_archives_path),
38   -
  38 +
39 39 make_option('--exclude-list',
40 40 dest='exclude_lists',
41   - help=("Mailing list that won't be imported. It can be used many"
  41 + help=("Mailing list that won't be imported. It can be used many"
42 42 "times for more than one list."),
43 43 action='append',
44 44 default=None),
45   -
  45 +
46 46 make_option('--all',
47 47 dest='all',
48 48 help='Import all messages (default: False)',
49 49 action="store_true",
50 50 default=False),
51 51 )
52   -
  52 +
53 53 def __init__(self, *args, **kwargs):
54 54 super(Command, self).__init__(*args, **kwargs)
55 55  
... ... @@ -68,18 +68,18 @@ class Command(BaseCommand, object):
68 68  
69 69 Yield: An instance of `mailbox.mboxMessage` for each email in the
70 70 file.
71   -
  71 +
72 72 """
73 73 self.log("Parsing email dump: %s." % email_filename)
74 74 mbox = mailbox.mbox(email_filename, factory=CustomMessage)
75   -
  75 +
76 76 # Get each email from mbox file
77 77 #
78 78 # The following implementation was used because the object
79   - # mbox does not support slicing. Converting the object to a
80   - # tuple (as represented in the code down here) was a valid
  79 + # mbox does not support slicing. Converting the object to a
  80 + # tuple (as represented in the code down here) was a valid
81 81 # option but its performance was too poor.
82   - #
  82 + #
83 83 #for message in tuple(mbox)[index:]:
84 84 # yield message
85 85 #
... ... @@ -90,8 +90,8 @@ class Command(BaseCommand, object):
90 90  
91 91 def get_emails(self, mailinglist_dir, all, exclude_lists):
92 92 """Generator function that get the emails from each mailing
93   - list dump dirctory. If `all` is set to True all the emails in the
94   - mbox will be imported if not it will just resume from the last
  93 + list dump dirctory. If `all` is set to True all the emails in the
  94 + mbox will be imported if not it will just resume from the last
95 95 message previously imported. The lists set in `exclude_lists`
96 96 won't be imported.
97 97  
... ... @@ -99,20 +99,20 @@ class Command(BaseCommand, object):
99 99  
100 100 """
101 101 self.log("Getting emails dumps from: %s" % mailinglist_dir)
102   -
  102 +
103 103 # Get the list of directories ending with .mbox
104   - mailing_lists_mboxes = (mbox for mbox in os.listdir(mailinglist_dir)
  104 + mailing_lists_mboxes = (mbox for mbox in os.listdir(mailinglist_dir)
105 105 if mbox.endswith('.mbox'))
106   -
  106 +
107 107 # Get messages from each mbox
108 108 for mbox in mailing_lists_mboxes:
109 109 mbox_path = os.path.join(mailinglist_dir, mbox, mbox)
110 110 mailinglist_name = mbox.split('.')[0]
111   -
  111 +
112 112 # Check if the mailinglist is set not to be imported
113 113 if exclude_lists and mailinglist_name in exclude_lists:
114 114 continue
115   -
  115 +
116 116 # Find the index of the last imported message
117 117 if all:
118 118 n_msgs = 0
... ... @@ -123,13 +123,13 @@ class Command(BaseCommand, object):
123 123 n_msgs = mailinglist.last_imported_index
124 124 except MailingList.DoesNotExist:
125 125 n_msgs = 0
126   -
  126 +
127 127 for index, msg in self.parse_emails(mbox_path, n_msgs):
128 128 yield mailinglist_name, msg, index
129 129  
130 130 def get_thread(self, email, mailinglist):
131 131 """Group messages by thread looking for similar subjects"""
132   -
  132 +
133 133 subject_slug = slugify(email.subject_clean)
134 134 thread = self.THREAD_CACHE.get(subject_slug, {}).get(mailinglist.id)
135 135 if thread is None:
... ... @@ -137,27 +137,27 @@ class Command(BaseCommand, object):
137 137 mailinglist=mailinglist,
138 138 subject_token=subject_slug
139 139 )[0]
140   -
  140 +
141 141 if self.THREAD_CACHE.get(subject_slug) is None:
142 142 self.THREAD_CACHE[subject_slug] = dict()
143 143 self.THREAD_CACHE[subject_slug][mailinglist.id] = thread
144 144  
145 145 thread.latest_message = email
146   - thread.save()
  146 + thread.save()
147 147 return thread
148   -
  148 +
149 149 def save_email(self, list_name, email_msg, index):
150 150 """Save email message into the database."""
151   -
  151 +
152 152 # Update last imported message into the DB
153 153 mailinglist, created = MailingList.objects.get_or_create(name=list_name)
154 154 mailinglist.last_imported_index = index
155   -
156   - if created:
  155 +
  156 + if created:
157 157 # if the mailinglist is newly created it's sure that the message
158 158 # is not in the DB yet.
159 159 self.create_email(mailinglist, email_msg)
160   -
  160 +
161 161 else:
162 162 # If the message is already at the database don't do anything
163 163 try:
... ... @@ -165,11 +165,11 @@ class Command(BaseCommand, object):
165 165 message_id=email_msg.get('Message-ID'),
166 166 thread__mailinglist=mailinglist
167 167 )
168   -
  168 +
169 169 except Message.DoesNotExist:
170 170 self.create_email(mailinglist, email_msg)
171   -
172   - mailinglist.save()
  171 +
  172 + mailinglist.save()
173 173  
174 174 def create_email(self, mailinglist, email_msg):
175 175  
... ... @@ -198,59 +198,59 @@ class Command(BaseCommand, object):
198 198 email.thread = self.get_thread(email, mailinglist)
199 199 email.save()
200 200  
201   - @transaction.commit_manually
  201 + @transaction.commit_manually
202 202 def import_emails(self, archives_path, all, exclude_lists=None):
203   - """Get emails from the filesystem from the `archives_path`
204   - and store them into the database. If `all` is set to True all
205   - the filesystem storage will be imported otherwise the
206   - importation will resume from the last message previously
  203 + """Get emails from the filesystem from the `archives_path`
  204 + and store them into the database. If `all` is set to True all
  205 + the filesystem storage will be imported otherwise the
  206 + importation will resume from the last message previously
207 207 imported. The lists set in `exclude_lists` won't be imported.
208   -
  208 +
209 209 """
210   -
  210 +
211 211 count = 0
212 212 email_generator = self.get_emails(archives_path, all, exclude_lists)
213 213 for mailinglist_name, msg, index in email_generator:
214 214 try:
215 215 self.save_email(mailinglist_name, msg, index)
216 216 except:
217   - # This anti-pattern is needed to avoid the transations to
  217 + # This anti-pattern is needed to avoid the transations to
218 218 # get stuck in case of errors.
219 219 transaction.rollback()
220 220 raise
221   -
  221 +
222 222 count += 1
223 223 if count % 1000 == 0:
224 224 transaction.commit()
225   -
  225 +
226 226 transaction.commit()
227   -
  227 +
228 228 def handle(self, *args, **options):
229 229 """Main command method."""
230   -
  230 +
231 231 lock_file = '/var/lock/colab/import_emails.lock'
232   -
  232 +
233 233 # Already running, so quit
234 234 if os.path.exists(lock_file):
235 235 self.log(("This script is already running. (If your are sure it's "
236 236 "not please delete the lock file in %s')") % lock_file)
237 237 sys.exit(0)
238   -
  238 +
239 239 if not os.path.exists(os.path.dirname(lock_file)):
240 240 os.mkdir(os.path.dirname(lock_file), 0755)
241   -
  241 +
242 242 run_lock = file(lock_file, 'w')
243 243 run_lock.close()
244   -
  244 +
245 245 archives_path = options.get('archives_path')
246 246 self.log('Using archives_path `%s`' % self.default_archives_path)
247   -
  247 +
248 248 if not os.path.exists(archives_path):
249 249 raise CommandError('archives_path (%s) does not exist' %
250 250 archives_path)
251   -
252   - self.import_emails(archives_path,
  251 +
  252 + self.import_emails(archives_path,
253 253 options.get('all'), options.get('exclude_lists'))
254   -
  254 +
255 255 os.remove(lock_file)
256   -
  256 +
... ...