Commit 092b3a271c8131bcac0d2cd8f481b8cee06c1103

Authored by Luan
1 parent b7e5d5b3

Starting indexing attachments

requirements.txt
... ... @@ -10,6 +10,7 @@ django-cliauth==0.9
10 10 django-mobile==0.3.0
11 11 django-haystack==2.1
12 12 pysolr==2.1
  13 +poster==0.8.1
13 14 etiquetando==0.1
14 15 html2text
15 16 django-taggit
... ...
src/colab/custom_settings.py
... ... @@ -20,6 +20,9 @@ DJANGO_DATE_FORMAT_TO_JS = {
20 20  
21 21 LANGUAGE_CODE = 'pt-br'
22 22  
  23 +# The absolute path to the folder containing the attachments
  24 +ATTACHMENTS_FOLDER_PATH = ''
  25 +
23 26 # ORDERING_DATA receives the options to order for as it's keys and a dict as
24 27 # value, if you want to order for the last name, you can use something like:
25 28 # 'last_name': {'name': 'Last Name', 'fields': 'last_name'} inside the dict,
... ... @@ -39,6 +42,23 @@ ORDERING_DATA = {
39 42 },
40 43 }
41 44  
  45 +# File type groupings is a tuple of tuples containg what it should filter,
  46 +# how it should be displayed, and a tuple of which mimetypes it includes
  47 +FILE_TYPE_GROUPINGS = (
  48 + ('document', gettext(u'Document'),
  49 + ('doc', 'docx', 'odt', 'otx', 'dotx', 'pdf', 'ott')),
  50 + ('presentation', gettext(u'Presentation'), ('ppt', 'pptx', 'odp')),
  51 + ('text', gettext(u'Text'), ('txt', 'po', 'conf', 'log')),
  52 + ('code', gettext(u'Code'),
  53 + ('py', 'php', 'js', 'sql', 'sh', 'patch', 'diff', 'html', '')),
  54 + ('compressed', gettext(u'Compressed'), ('rar', 'zip', 'gz', 'tgz', 'bz2')),
  55 + ('image', gettext(u'Image'),
  56 + ('jpg', 'jpeg', 'png', 'tiff', 'gif', 'svg', 'psd', 'planner', 'cdr')),
  57 + ('spreadsheet', gettext(u'Spreadsheet'),
  58 + ('ods', 'xls', 'xlsx', 'xslt', 'csv')),
  59 +)
  60 +
  61 +
42 62 # the following variable define how many characters should be shown before
43 63 # a highlighted word, to make sure that the highlighted word will appear
44 64 HIGHLIGHT_NUM_CHARS_BEFORE_MATCH = 30
... ...
src/proxy/migrations/0003_create_attachment_view.py
... ... @@ -17,7 +17,8 @@ class Migration(DataMigration):
17 17 CONCAT(attachment.type, '/' , attachment.id, '/', attachment.filename) AS url,
18 18 attachment.type AS used_by,
19 19 attachment.filename AS filename,
20   - (SELECT LOWER(SUBSTRING(attachment.filename FROM '\w{2,3}$'))) AS mimetype,
  20 + attachment.id as attach_id,
  21 + (SELECT LOWER(SUBSTRING(attachment.filename FROM '\.(\w+)$'))) AS mimetype,
21 22 attachment.author AS author,
22 23 attachment.description AS description,
23 24 attachment.size AS size,
... ...
src/proxy/models.py
1 1 # -*- coding: utf-8 -*-
2 2  
  3 +import os
  4 +import urllib2
  5 +
  6 +from django.conf import settings
3 7 from django.db import models
4 8  
5 9 from accounts.models import User
... ... @@ -8,17 +12,28 @@ from hitcount.models import HitCountModelMixin
8 12  
9 13 class Attachment(models.Model, HitCountModelMixin):
10 14 url = models.TextField(primary_key=True)
  15 + attach_id = models.TextField()
11 16 used_by = models.TextField()
12 17 filename = models.TextField()
13 18 author = models.TextField(blank=True)
14 19 description = models.TextField(blank=True)
15 20 created = models.DateTimeField(blank=True)
16 21 mimetype = models.TextField(blank=True)
  22 + size = models.IntegerField(blank=True)
17 23  
18 24 class Meta:
19 25 managed = False
20 26 db_table = 'attachment_view'
21 27  
  28 + @property
  29 + def filepath(self):
  30 + return os.path.join(
  31 + settings.ATTACHMENTS_FOLDER_PATH,
  32 + self.used_by,
  33 + self.attach_id,
  34 + urllib2.quote(self.filename.encode('utf8'))
  35 + )
  36 +
22 37 def get_absolute_url(self):
23 38 return u'/raw-attachment/{}'.format(self.url)
24 39  
... ...
src/proxy/search_indexes.py
1 1 # -*- coding: utf-8 -*-
2 2  
3 3 import math
  4 +import string
4 5  
5   -from datetime import datetime
6   -
7   -from django.db.models import Q
  6 +from django.template import loader, Context
  7 +from django.utils.text import slugify
8 8 from haystack import indexes
  9 +from haystack.utils import log as logging
9 10  
10 11 from search.base_indexes import BaseIndex
11   -from .models import Ticket, Wiki, Revision
  12 +from .models import Attachment, Ticket, Wiki, Revision
  13 +
  14 +
  15 +logger = logging.getLogger('haystack')
  16 +
  17 +# the string maketrans always return a string encoded with latin1
  18 +# http://stackoverflow.com/questions/1324067/how-do-i-get-str-translate-to-work-with-unicode-strings
  19 +table = string.maketrans(
  20 + string.punctuation,
  21 + '.' * len(string.punctuation)
  22 +).decode('latin1')
  23 +
  24 +
  25 +class AttachmentIndex(BaseIndex, indexes.Indexable):
  26 + title = indexes.CharField(model_attr='filename')
  27 + description = indexes.CharField(model_attr='description', null=True)
  28 + modified = indexes.DateTimeField(model_attr='created', null=True)
  29 + used_by = indexes.CharField(model_attr='used_by', null=True, stored=False)
  30 + mimetype = indexes.CharField(
  31 + model_attr='mimetype',
  32 + null=True,
  33 + stored=False
  34 + )
  35 + size = indexes.IntegerField(model_attr='size', null=True, stored=False)
  36 + filename = indexes.CharField(stored=False)
  37 +
  38 + def get_model(self):
  39 + return Attachment
  40 +
  41 + def get_updated_field(self):
  42 + return 'created'
  43 +
  44 + def prepare(self, obj):
  45 + data = super(AttachmentIndex, self).prepare(obj)
  46 +
  47 + try:
  48 + file_obj = open(obj.filepath)
  49 + except IOError as e:
  50 + logger.warning(u'IOError: %s - %s', e.strerror, e.filename)
  51 + return data
  52 + backend = self._get_backend(None)
  53 + extracted_data = backend.extract_file_contents(file_obj)
  54 +
  55 + t = loader.select_template(
  56 + ('search/indexes/proxy/attachment_text.txt', )
  57 + )
  58 + data['text'] = t.render(Context({
  59 + 'object': obj,
  60 + 'extracted': extracted_data,
  61 + }))
  62 + return data
  63 +
  64 + def prepare_filename(self, obj):
  65 + return obj.filename.translate(table).replace('.', ' ')
  66 +
  67 + def prepare_icon_name(self, obj):
  68 + return u'file'
  69 +
  70 + def prepare_type(self, obj):
  71 + return u'attachment'
12 72  
13 73  
14 74 class WikiIndex(BaseIndex, indexes.Indexable):
... ... @@ -26,7 +86,7 @@ class WikiIndex(BaseIndex, indexes.Indexable):
26 86 return u'{}\n{}'.format(obj.wiki_text, obj.collaborators)
27 87  
28 88 def prepare_icon_name(self, obj):
29   - return u'file'
  89 + return u'book'
30 90  
31 91 def prepare_type(self, obj):
32 92 return u'wiki'
... ...
src/proxy/templates/search/indexes/proxy/attachment_text.txt 0 → 100644
... ... @@ -0,0 +1,15 @@
  1 +{{ object.filename }}
  2 +{{ object.filename|slugify }}
  3 +{{ object.description }}
  4 +{{ object.description|slugify }}
  5 +{{ object.used_by }}
  6 +{{ object.mimetype }}
  7 +{{ object.get_author.get_full_name }}
  8 +
  9 +{% for k, v in extracted.metadata.items %}
  10 + {% for val in v %}
  11 + {{ k }}: {{ val|safe }}
  12 + {% endfor %}
  13 +{% endfor %}
  14 +
  15 +{{ extracted.contents|striptags|safe }}
... ...
src/search/forms.py
... ... @@ -23,8 +23,8 @@ class ColabSearchForm(SearchForm):
23 23 list = forms.MultipleChoiceField(
24 24 required=False,
25 25 label=_(u'Mailinglist'),
26   - choices=[(v, v) for v in MailingList.objects.values('name')
27   - for (v, v) in v.items()]
  26 + choices=[(v, v) for v in MailingList.objects.values_list(
  27 + 'name', flat=True)]
28 28 )
29 29 milestone = forms.CharField(required=False, label=_(u'Milestone'))
30 30 priority = forms.CharField(required=False, label=_(u'Priority'))
... ... @@ -40,30 +40,71 @@ class ColabSearchForm(SearchForm):
40 40 role = forms.CharField(required=False, label=_(u'Role'))
41 41 since = forms.DateField(required=False, label=_(u'Since'))
42 42 until = forms.DateField(required=False, label=_(u'Until'))
  43 + filename = forms.CharField(required=False, label=_(u'Filename'))
  44 + used_by = forms.CharField(required=False, label=_(u'Used by'))
  45 + mimetype = forms.CharField(required=False, label=_(u'File type'))
  46 + size = forms.CharField(required=False, label=_(u'Size'))
43 47  
44 48 def search(self):
45 49 if not self.is_valid():
46 50 return self.no_query_found()
47 51  
  52 + # filter_or goes here
  53 + sqs = self.searchqueryset.all()
  54 + mimetype = self.cleaned_data['mimetype']
  55 + if mimetype:
  56 + filter_mimetypes = {'mimetype__in': []}
  57 + for type_, display, mimelist in settings.FILE_TYPE_GROUPINGS:
  58 + if type_ in mimetype:
  59 + filter_mimetypes['mimetype__in'] += mimelist
  60 + if not self.cleaned_data['size']:
  61 + sqs = sqs.filter_or(mimetype__in=mimelist)
  62 +
  63 + if self.cleaned_data['size']:
  64 + # (1024 * 1024) / 2
  65 + # (1024 * 1024) * 10
  66 + filter_sizes = {}
  67 + filter_sizes_exp = {}
  68 + if '<500KB' in self.cleaned_data['size']:
  69 + filter_sizes['size__lt'] = 524288
  70 + if '500KB__10MB' in self.cleaned_data['size']:
  71 + filter_sizes_exp['size__gte'] = 524288
  72 + filter_sizes_exp['size__lte'] = 10485760
  73 + if '>10MB' in self.cleaned_data['size']:
  74 + filter_sizes['size__gt'] = 10485760
  75 +
  76 + if self.cleaned_data['mimetype']:
  77 + # Add the mimetypes filters to this dict and filter it
  78 + if filter_sizes_exp:
  79 + filter_sizes_exp.update(filter_mimetypes)
  80 + sqs = sqs.filter_or(**filter_sizes_exp)
  81 + for filter_or in filter_sizes.items():
  82 + filter_or = dict((filter_or, ))
  83 + filter_or.update(filter_mimetypes)
  84 + sqs = sqs.filter_or(**filter_or)
  85 + else:
  86 + for filter_or in filter_sizes.items():
  87 + filter_or = dict((filter_or, ))
  88 + sqs = sqs.filter_or(**filter_or)
  89 + sqs = sqs.filter_or(**filter_sizes_exp)
  90 +
  91 + if self.cleaned_data['used_by']:
  92 + sqs = sqs.filter_or(used_by__in=self.cleaned_data['used_by'].split())
  93 +
48 94 if self.cleaned_data.get('q'):
49 95 q = unicodedata.normalize(
50 96 'NFKD', unicode(self.cleaned_data.get('q'))
51 97 ).encode('ascii', 'ignore')
52   - sqs = self.searchqueryset.auto_query(q)
  98 + sqs = sqs.auto_query(q)
53 99 sqs = sqs.filter(content=AltParser(
54 100 'dismax',
55 101 q,
56 102 pf='title^2.1 author^1.9 description^1.7',
57 103 mm='2<70%'
58 104 ))
59   - else:
60   - sqs = self.searchqueryset.all()
61   -
62 105  
63 106 if self.cleaned_data['type']:
64   - "It will consider other types with a whitespace"
65   - types = self.cleaned_data['type']
66   - sqs = sqs.filter(type__in=types.split())
  107 + sqs = sqs.filter(type=self.cleaned_data['type'])
67 108  
68 109 if self.cleaned_data['order']:
69 110 for option, dict_order in settings.ORDERING_DATA.items():
... ... @@ -111,6 +152,9 @@ class ColabSearchForm(SearchForm):
111 152 if self.cleaned_data['until']:
112 153 sqs = sqs.filter(modified__lte=self.cleaned_data['until'])
113 154  
  155 + if self.cleaned_data['filename']:
  156 + sqs = sqs.filter(filename=self.cleaned_data['filename'])
  157 +
114 158 if self.load_all:
115 159 sqs = sqs.load_all()
116 160  
... ...
src/search/views.py
... ... @@ -5,6 +5,8 @@ from django.utils.translation import ugettext as _
5 5  
6 6 from haystack.views import SearchView
7 7  
  8 +from proxy.models import Attachment
  9 +
8 10  
9 11 class ColabSearchView(SearchView):
10 12 def extra_context(self, *args, **kwargs):
... ... @@ -106,6 +108,26 @@ class ColabSearchView(SearchView):
106 108 ('role', _(u'Role'), self.request.GET.get('role'))
107 109 ),
108 110 },
  111 + 'attachment': {
  112 + 'name': _(u'Attachment'),
  113 + 'fields': (
  114 + (
  115 + 'filename',
  116 + _(u'Filename'),
  117 + self.request.GET.get('filename')
  118 + ),
  119 + ('author', _(u'Author'), self.request.GET.get('author')),
  120 + (
  121 + 'used_by',
  122 + _(u'Used by'), self.request.GET.get('used_by')),
  123 + (
  124 + 'mimetype',
  125 + _(u'File type'),
  126 + self.request.GET.get('mimetype')
  127 + ),
  128 + ('size', _(u'Size'), self.request.GET.get('size')),
  129 + )
  130 + }
109 131 }
110 132  
111 133 try:
... ... @@ -113,10 +135,36 @@ class ColabSearchView(SearchView):
113 135 except AttributeError:
114 136 type_chosen = ''
115 137  
  138 + mimetype_choices = ()
  139 + size_choices = ()
  140 + used_by_choices = ()
  141 +
  142 + if type_chosen == 'attachment':
  143 + mimetype_choices = [(type_, display) for type_, display, mimelist_ in settings.FILE_TYPE_GROUPINGS]
  144 + size_choices = [
  145 + ('<500KB', u'< 500 KB'),
  146 + ('500KB__10MB', u'>= 500 KB <= 10 MB'),
  147 + ('>10MB', u'> 10 MB'),
  148 + ]
  149 + used_by_choices = set([
  150 + (v, v) for v in Attachment.objects.values_list(
  151 + 'used_by', flat=True)
  152 + ])
  153 +
  154 + mimetype_chosen = self.request.GET.get('mimetype')
  155 + size_chosen = self.request.GET.get('size')
  156 + used_by_chosen = self.request.GET.get('used_by')
  157 +
116 158 return dict(
117 159 filters=types.get(type_chosen),
118 160 type_chosen=type_chosen,
119 161 order_data=settings.ORDERING_DATA,
120 162 date_format=date_format,
121 163 use_language=use_language,
  164 + mimetype_chosen=mimetype_chosen if mimetype_chosen else '',
  165 + mimetype_choices=mimetype_choices,
  166 + size_chosen=size_chosen if size_chosen else '',
  167 + size_choices=size_choices,
  168 + used_by_chosen=used_by_chosen if used_by_chosen else '',
  169 + used_by_choices=used_by_choices,
122 170 )
... ...
src/templates/search.html
... ... @@ -21,7 +21,7 @@
21 21  
22 22 <ul class="none indent">
23 23 <li {% ifequal type "wiki" %} title="{% trans "Remove filter" %}" {% endifequal %}>
24   - <span class="glyphicon glyphicon-file"></span>
  24 + <span class="glyphicon glyphicon-book"></span>
25 25 <a href="{% ifnotequal type "wiki" %} {% append_to_get type='wiki' %} {% else %} {% append_to_get type="" %} {% endifnotequal %}">{% trans "Wiki" %}</a>
26 26 </li>
27 27 <li {% ifequal type "thread" %} title="{% trans "Remove filter" %}" {% endifequal %}>
... ...
src/templates/search/search-wiki-preview.html
1 1 {% load i18n %}
2 2  
3   -<span class="glyphicon glyphicon-file" title="{{ result.type }}"></span>
  3 +<span class="glyphicon glyphicon-book" title="{{ result.type }}"></span>
4 4  
5 5 <span class="subject">
6 6 <a href="{{ result.url }}">{{ result.name }}</a>
... ...
src/templates/search/search.html
... ... @@ -62,15 +62,69 @@
62 62 {% for field_lookup, field_display, field_value in filters.fields %}
63 63 <div class="form-group">
64 64 <label for="{{ field_lookup }}">{{ field_display }}</label>
65   - {% ifequal field_lookup "list" %}
  65 + {% if field_lookup == "list" %}
66 66 <select name="{{ field_lookup }}" class="form-control" multiple>
67 67 {% for value, option in form.fields.list.choices %}
68 68 <option value="{{ value }}" {% if value in field_value %}selected{% endif %}>{{ option }}</option>
69 69 {% endfor %}
70 70 </select>
  71 + {% elif field_lookup == "size" %}
  72 + <ul class="unstyled-list">
  73 + {% for value, option in size_choices %}
  74 + {% with value|add:" "|add:size_chosen as sizelistadd %}
  75 + {% if value in field_value %}
  76 + <li class="selected" title="{% trans "Remove filter" %}">
  77 + <span class="glyphicon glyphicon-remove"></span>
  78 + <a href="{% pop_from_get size=value %}">{{ option }}</a>
  79 + </li>
  80 + {% else %}
  81 + <li>
  82 + <span class="glyphicon glyphicon-chevron-right"></span>
  83 + <a href="{% append_to_get size=sizelistadd %}">{{ option }}</a>
  84 + </li>
  85 + {% endif %}
  86 + {% endwith %}
  87 + {% endfor %}
  88 + </ul>
  89 + {% elif field_lookup == "mimetype" %}
  90 + <ul class="unstyled-list">
  91 + {% for value, option in mimetype_choices %}
  92 + {% with value|add:" "|add:mimetype_chosen as mimelistadd %}
  93 + {% if value in mime_chosen %}
  94 + <li class="selected" title="{% trans "Remove filter" %}">
  95 + <span class="glyphicon glyphicon-remove"></span>
  96 + <a href="{% pop_from_get mimetype=value %}">{{ option }}</a>
  97 + </li>
  98 + {% else %}
  99 + <li>
  100 + <span class="glyphicon glyphicon-chevron-right"></span>
  101 + <a href="{% append_to_get mimetype=mimelistadd %}">{{ option }}</a>
  102 + </li>
  103 + {% endif %}
  104 + {% endwith %}
  105 + {% endfor %}
  106 + </ul>
  107 + {% elif field_lookup == "used_by" %}
  108 + <ul class="unstyled-list">
  109 + {% for value, option in used_by_choices %}
  110 + {% with value|add:" "|add:used_by_chosen as used_byadd %}
  111 + {% if value in used_by_chosen %}
  112 + <li class="selected" title="{% trans "Remove filter" %}">
  113 + <span class="glyphicon glyphicon-remove"></span>
  114 + <a href="{% pop_from_get used_by=value %}">{{ option }}</a>
  115 + </li>
  116 + {% else %}
  117 + <li>
  118 + <span class="glyphicon glyphicon-chevron-right"></span>
  119 + <a href="{% append_to_get used_by=used_byadd %}">{{ option }}</a>
  120 + </li>
  121 + {% endif %}
  122 + {% endwith %}
  123 + {% endfor %}
  124 + </ul>
71 125 {% else %}
72 126 <input type="text" class="form-control" placeholder="{{ field_display }}" name="{{ field_lookup }}" {% if field_value %}value="{{ field_value }}"{% endif %}>
73   - {% endifequal %}
  127 + {% endif %}
74 128 </div>
75 129 {% endfor %}
76 130 <button type="submit" class="btn btn-default pull-right">
... ... @@ -101,7 +155,7 @@
101 155  
102 156 <ul class="unstyled-list">
103 157 <li>
104   - <span class="glyphicon glyphicon-file"></span>
  158 + <span class="glyphicon glyphicon-book"></span>
105 159 <a href="{% append_to_get type='wiki' %}">{% trans "Wiki" %}</a>
106 160 </li>
107 161 <li>
... ... @@ -120,6 +174,10 @@
120 174 <span class="glyphicon glyphicon-user"></span>
121 175 <a href="{% append_to_get type='user' %}">{% trans "User" %}</a>
122 176 </li>
  177 + <li>
  178 + <span class="glyphicon glyphicon-file"></span>
  179 + <a href="{% append_to_get type='attachment' %}">{% trans "Attachment" %}</a>
  180 + </li>
123 181 </ul>
124 182 {% endif %}
125 183 <hr />
... ...