Commit 092b3a271c8131bcac0d2cd8f481b8cee06c1103
1 parent
b7e5d5b3
Exists in
master
and in
39 other branches
Starting indexing attachments
Showing
11 changed files
with
282 additions
and
20 deletions
Show diff stats
requirements.txt
src/colab/custom_settings.py
... | ... | @@ -20,6 +20,9 @@ DJANGO_DATE_FORMAT_TO_JS = { |
20 | 20 | |
21 | 21 | LANGUAGE_CODE = 'pt-br' |
22 | 22 | |
23 | +# The absolute path to the folder containing the attachments | |
24 | +ATTACHMENTS_FOLDER_PATH = '' | |
25 | + | |
23 | 26 | # ORDERING_DATA receives the options to order for as it's keys and a dict as |
24 | 27 | # value, if you want to order for the last name, you can use something like: |
25 | 28 | # 'last_name': {'name': 'Last Name', 'fields': 'last_name'} inside the dict, |
... | ... | @@ -39,6 +42,23 @@ ORDERING_DATA = { |
39 | 42 | }, |
40 | 43 | } |
41 | 44 | |
45 | +# File type groupings is a tuple of tuples containg what it should filter, | |
46 | +# how it should be displayed, and a tuple of which mimetypes it includes | |
47 | +FILE_TYPE_GROUPINGS = ( | |
48 | + ('document', gettext(u'Document'), | |
49 | + ('doc', 'docx', 'odt', 'otx', 'dotx', 'pdf', 'ott')), | |
50 | + ('presentation', gettext(u'Presentation'), ('ppt', 'pptx', 'odp')), | |
51 | + ('text', gettext(u'Text'), ('txt', 'po', 'conf', 'log')), | |
52 | + ('code', gettext(u'Code'), | |
53 | + ('py', 'php', 'js', 'sql', 'sh', 'patch', 'diff', 'html', '')), | |
54 | + ('compressed', gettext(u'Compressed'), ('rar', 'zip', 'gz', 'tgz', 'bz2')), | |
55 | + ('image', gettext(u'Image'), | |
56 | + ('jpg', 'jpeg', 'png', 'tiff', 'gif', 'svg', 'psd', 'planner', 'cdr')), | |
57 | + ('spreadsheet', gettext(u'Spreadsheet'), | |
58 | + ('ods', 'xls', 'xlsx', 'xslt', 'csv')), | |
59 | +) | |
60 | + | |
61 | + | |
42 | 62 | # the following variable define how many characters should be shown before |
43 | 63 | # a highlighted word, to make sure that the highlighted word will appear |
44 | 64 | HIGHLIGHT_NUM_CHARS_BEFORE_MATCH = 30 | ... | ... |
src/proxy/migrations/0003_create_attachment_view.py
... | ... | @@ -17,7 +17,8 @@ class Migration(DataMigration): |
17 | 17 | CONCAT(attachment.type, '/' , attachment.id, '/', attachment.filename) AS url, |
18 | 18 | attachment.type AS used_by, |
19 | 19 | attachment.filename AS filename, |
20 | - (SELECT LOWER(SUBSTRING(attachment.filename FROM '\w{2,3}$'))) AS mimetype, | |
20 | + attachment.id as attach_id, | |
21 | + (SELECT LOWER(SUBSTRING(attachment.filename FROM '\.(\w+)$'))) AS mimetype, | |
21 | 22 | attachment.author AS author, |
22 | 23 | attachment.description AS description, |
23 | 24 | attachment.size AS size, | ... | ... |
src/proxy/models.py
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | |
3 | +import os | |
4 | +import urllib2 | |
5 | + | |
6 | +from django.conf import settings | |
3 | 7 | from django.db import models |
4 | 8 | |
5 | 9 | from accounts.models import User |
... | ... | @@ -8,17 +12,28 @@ from hitcount.models import HitCountModelMixin |
8 | 12 | |
9 | 13 | class Attachment(models.Model, HitCountModelMixin): |
10 | 14 | url = models.TextField(primary_key=True) |
15 | + attach_id = models.TextField() | |
11 | 16 | used_by = models.TextField() |
12 | 17 | filename = models.TextField() |
13 | 18 | author = models.TextField(blank=True) |
14 | 19 | description = models.TextField(blank=True) |
15 | 20 | created = models.DateTimeField(blank=True) |
16 | 21 | mimetype = models.TextField(blank=True) |
22 | + size = models.IntegerField(blank=True) | |
17 | 23 | |
18 | 24 | class Meta: |
19 | 25 | managed = False |
20 | 26 | db_table = 'attachment_view' |
21 | 27 | |
28 | + @property | |
29 | + def filepath(self): | |
30 | + return os.path.join( | |
31 | + settings.ATTACHMENTS_FOLDER_PATH, | |
32 | + self.used_by, | |
33 | + self.attach_id, | |
34 | + urllib2.quote(self.filename.encode('utf8')) | |
35 | + ) | |
36 | + | |
22 | 37 | def get_absolute_url(self): |
23 | 38 | return u'/raw-attachment/{}'.format(self.url) |
24 | 39 | ... | ... |
src/proxy/search_indexes.py
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | |
3 | 3 | import math |
4 | +import string | |
4 | 5 | |
5 | -from datetime import datetime | |
6 | - | |
7 | -from django.db.models import Q | |
6 | +from django.template import loader, Context | |
7 | +from django.utils.text import slugify | |
8 | 8 | from haystack import indexes |
9 | +from haystack.utils import log as logging | |
9 | 10 | |
10 | 11 | from search.base_indexes import BaseIndex |
11 | -from .models import Ticket, Wiki, Revision | |
12 | +from .models import Attachment, Ticket, Wiki, Revision | |
13 | + | |
14 | + | |
15 | +logger = logging.getLogger('haystack') | |
16 | + | |
17 | +# the string maketrans always return a string encoded with latin1 | |
18 | +# http://stackoverflow.com/questions/1324067/how-do-i-get-str-translate-to-work-with-unicode-strings | |
19 | +table = string.maketrans( | |
20 | + string.punctuation, | |
21 | + '.' * len(string.punctuation) | |
22 | +).decode('latin1') | |
23 | + | |
24 | + | |
25 | +class AttachmentIndex(BaseIndex, indexes.Indexable): | |
26 | + title = indexes.CharField(model_attr='filename') | |
27 | + description = indexes.CharField(model_attr='description', null=True) | |
28 | + modified = indexes.DateTimeField(model_attr='created', null=True) | |
29 | + used_by = indexes.CharField(model_attr='used_by', null=True, stored=False) | |
30 | + mimetype = indexes.CharField( | |
31 | + model_attr='mimetype', | |
32 | + null=True, | |
33 | + stored=False | |
34 | + ) | |
35 | + size = indexes.IntegerField(model_attr='size', null=True, stored=False) | |
36 | + filename = indexes.CharField(stored=False) | |
37 | + | |
38 | + def get_model(self): | |
39 | + return Attachment | |
40 | + | |
41 | + def get_updated_field(self): | |
42 | + return 'created' | |
43 | + | |
44 | + def prepare(self, obj): | |
45 | + data = super(AttachmentIndex, self).prepare(obj) | |
46 | + | |
47 | + try: | |
48 | + file_obj = open(obj.filepath) | |
49 | + except IOError as e: | |
50 | + logger.warning(u'IOError: %s - %s', e.strerror, e.filename) | |
51 | + return data | |
52 | + backend = self._get_backend(None) | |
53 | + extracted_data = backend.extract_file_contents(file_obj) | |
54 | + | |
55 | + t = loader.select_template( | |
56 | + ('search/indexes/proxy/attachment_text.txt', ) | |
57 | + ) | |
58 | + data['text'] = t.render(Context({ | |
59 | + 'object': obj, | |
60 | + 'extracted': extracted_data, | |
61 | + })) | |
62 | + return data | |
63 | + | |
64 | + def prepare_filename(self, obj): | |
65 | + return obj.filename.translate(table).replace('.', ' ') | |
66 | + | |
67 | + def prepare_icon_name(self, obj): | |
68 | + return u'file' | |
69 | + | |
70 | + def prepare_type(self, obj): | |
71 | + return u'attachment' | |
12 | 72 | |
13 | 73 | |
14 | 74 | class WikiIndex(BaseIndex, indexes.Indexable): |
... | ... | @@ -26,7 +86,7 @@ class WikiIndex(BaseIndex, indexes.Indexable): |
26 | 86 | return u'{}\n{}'.format(obj.wiki_text, obj.collaborators) |
27 | 87 | |
28 | 88 | def prepare_icon_name(self, obj): |
29 | - return u'file' | |
89 | + return u'book' | |
30 | 90 | |
31 | 91 | def prepare_type(self, obj): |
32 | 92 | return u'wiki' | ... | ... |
src/proxy/templates/search/indexes/proxy/attachment_text.txt
0 → 100644
... | ... | @@ -0,0 +1,15 @@ |
1 | +{{ object.filename }} | |
2 | +{{ object.filename|slugify }} | |
3 | +{{ object.description }} | |
4 | +{{ object.description|slugify }} | |
5 | +{{ object.used_by }} | |
6 | +{{ object.mimetype }} | |
7 | +{{ object.get_author.get_full_name }} | |
8 | + | |
9 | +{% for k, v in extracted.metadata.items %} | |
10 | + {% for val in v %} | |
11 | + {{ k }}: {{ val|safe }} | |
12 | + {% endfor %} | |
13 | +{% endfor %} | |
14 | + | |
15 | +{{ extracted.contents|striptags|safe }} | ... | ... |
src/search/forms.py
... | ... | @@ -23,8 +23,8 @@ class ColabSearchForm(SearchForm): |
23 | 23 | list = forms.MultipleChoiceField( |
24 | 24 | required=False, |
25 | 25 | label=_(u'Mailinglist'), |
26 | - choices=[(v, v) for v in MailingList.objects.values('name') | |
27 | - for (v, v) in v.items()] | |
26 | + choices=[(v, v) for v in MailingList.objects.values_list( | |
27 | + 'name', flat=True)] | |
28 | 28 | ) |
29 | 29 | milestone = forms.CharField(required=False, label=_(u'Milestone')) |
30 | 30 | priority = forms.CharField(required=False, label=_(u'Priority')) |
... | ... | @@ -40,30 +40,71 @@ class ColabSearchForm(SearchForm): |
40 | 40 | role = forms.CharField(required=False, label=_(u'Role')) |
41 | 41 | since = forms.DateField(required=False, label=_(u'Since')) |
42 | 42 | until = forms.DateField(required=False, label=_(u'Until')) |
43 | + filename = forms.CharField(required=False, label=_(u'Filename')) | |
44 | + used_by = forms.CharField(required=False, label=_(u'Used by')) | |
45 | + mimetype = forms.CharField(required=False, label=_(u'File type')) | |
46 | + size = forms.CharField(required=False, label=_(u'Size')) | |
43 | 47 | |
44 | 48 | def search(self): |
45 | 49 | if not self.is_valid(): |
46 | 50 | return self.no_query_found() |
47 | 51 | |
52 | + # filter_or goes here | |
53 | + sqs = self.searchqueryset.all() | |
54 | + mimetype = self.cleaned_data['mimetype'] | |
55 | + if mimetype: | |
56 | + filter_mimetypes = {'mimetype__in': []} | |
57 | + for type_, display, mimelist in settings.FILE_TYPE_GROUPINGS: | |
58 | + if type_ in mimetype: | |
59 | + filter_mimetypes['mimetype__in'] += mimelist | |
60 | + if not self.cleaned_data['size']: | |
61 | + sqs = sqs.filter_or(mimetype__in=mimelist) | |
62 | + | |
63 | + if self.cleaned_data['size']: | |
64 | + # (1024 * 1024) / 2 | |
65 | + # (1024 * 1024) * 10 | |
66 | + filter_sizes = {} | |
67 | + filter_sizes_exp = {} | |
68 | + if '<500KB' in self.cleaned_data['size']: | |
69 | + filter_sizes['size__lt'] = 524288 | |
70 | + if '500KB__10MB' in self.cleaned_data['size']: | |
71 | + filter_sizes_exp['size__gte'] = 524288 | |
72 | + filter_sizes_exp['size__lte'] = 10485760 | |
73 | + if '>10MB' in self.cleaned_data['size']: | |
74 | + filter_sizes['size__gt'] = 10485760 | |
75 | + | |
76 | + if self.cleaned_data['mimetype']: | |
77 | + # Add the mimetypes filters to this dict and filter it | |
78 | + if filter_sizes_exp: | |
79 | + filter_sizes_exp.update(filter_mimetypes) | |
80 | + sqs = sqs.filter_or(**filter_sizes_exp) | |
81 | + for filter_or in filter_sizes.items(): | |
82 | + filter_or = dict((filter_or, )) | |
83 | + filter_or.update(filter_mimetypes) | |
84 | + sqs = sqs.filter_or(**filter_or) | |
85 | + else: | |
86 | + for filter_or in filter_sizes.items(): | |
87 | + filter_or = dict((filter_or, )) | |
88 | + sqs = sqs.filter_or(**filter_or) | |
89 | + sqs = sqs.filter_or(**filter_sizes_exp) | |
90 | + | |
91 | + if self.cleaned_data['used_by']: | |
92 | + sqs = sqs.filter_or(used_by__in=self.cleaned_data['used_by'].split()) | |
93 | + | |
48 | 94 | if self.cleaned_data.get('q'): |
49 | 95 | q = unicodedata.normalize( |
50 | 96 | 'NFKD', unicode(self.cleaned_data.get('q')) |
51 | 97 | ).encode('ascii', 'ignore') |
52 | - sqs = self.searchqueryset.auto_query(q) | |
98 | + sqs = sqs.auto_query(q) | |
53 | 99 | sqs = sqs.filter(content=AltParser( |
54 | 100 | 'dismax', |
55 | 101 | q, |
56 | 102 | pf='title^2.1 author^1.9 description^1.7', |
57 | 103 | mm='2<70%' |
58 | 104 | )) |
59 | - else: | |
60 | - sqs = self.searchqueryset.all() | |
61 | - | |
62 | 105 | |
63 | 106 | if self.cleaned_data['type']: |
64 | - "It will consider other types with a whitespace" | |
65 | - types = self.cleaned_data['type'] | |
66 | - sqs = sqs.filter(type__in=types.split()) | |
107 | + sqs = sqs.filter(type=self.cleaned_data['type']) | |
67 | 108 | |
68 | 109 | if self.cleaned_data['order']: |
69 | 110 | for option, dict_order in settings.ORDERING_DATA.items(): |
... | ... | @@ -111,6 +152,9 @@ class ColabSearchForm(SearchForm): |
111 | 152 | if self.cleaned_data['until']: |
112 | 153 | sqs = sqs.filter(modified__lte=self.cleaned_data['until']) |
113 | 154 | |
155 | + if self.cleaned_data['filename']: | |
156 | + sqs = sqs.filter(filename=self.cleaned_data['filename']) | |
157 | + | |
114 | 158 | if self.load_all: |
115 | 159 | sqs = sqs.load_all() |
116 | 160 | ... | ... |
src/search/views.py
... | ... | @@ -5,6 +5,8 @@ from django.utils.translation import ugettext as _ |
5 | 5 | |
6 | 6 | from haystack.views import SearchView |
7 | 7 | |
8 | +from proxy.models import Attachment | |
9 | + | |
8 | 10 | |
9 | 11 | class ColabSearchView(SearchView): |
10 | 12 | def extra_context(self, *args, **kwargs): |
... | ... | @@ -106,6 +108,26 @@ class ColabSearchView(SearchView): |
106 | 108 | ('role', _(u'Role'), self.request.GET.get('role')) |
107 | 109 | ), |
108 | 110 | }, |
111 | + 'attachment': { | |
112 | + 'name': _(u'Attachment'), | |
113 | + 'fields': ( | |
114 | + ( | |
115 | + 'filename', | |
116 | + _(u'Filename'), | |
117 | + self.request.GET.get('filename') | |
118 | + ), | |
119 | + ('author', _(u'Author'), self.request.GET.get('author')), | |
120 | + ( | |
121 | + 'used_by', | |
122 | + _(u'Used by'), self.request.GET.get('used_by')), | |
123 | + ( | |
124 | + 'mimetype', | |
125 | + _(u'File type'), | |
126 | + self.request.GET.get('mimetype') | |
127 | + ), | |
128 | + ('size', _(u'Size'), self.request.GET.get('size')), | |
129 | + ) | |
130 | + } | |
109 | 131 | } |
110 | 132 | |
111 | 133 | try: |
... | ... | @@ -113,10 +135,36 @@ class ColabSearchView(SearchView): |
113 | 135 | except AttributeError: |
114 | 136 | type_chosen = '' |
115 | 137 | |
138 | + mimetype_choices = () | |
139 | + size_choices = () | |
140 | + used_by_choices = () | |
141 | + | |
142 | + if type_chosen == 'attachment': | |
143 | + mimetype_choices = [(type_, display) for type_, display, mimelist_ in settings.FILE_TYPE_GROUPINGS] | |
144 | + size_choices = [ | |
145 | + ('<500KB', u'< 500 KB'), | |
146 | + ('500KB__10MB', u'>= 500 KB <= 10 MB'), | |
147 | + ('>10MB', u'> 10 MB'), | |
148 | + ] | |
149 | + used_by_choices = set([ | |
150 | + (v, v) for v in Attachment.objects.values_list( | |
151 | + 'used_by', flat=True) | |
152 | + ]) | |
153 | + | |
154 | + mimetype_chosen = self.request.GET.get('mimetype') | |
155 | + size_chosen = self.request.GET.get('size') | |
156 | + used_by_chosen = self.request.GET.get('used_by') | |
157 | + | |
116 | 158 | return dict( |
117 | 159 | filters=types.get(type_chosen), |
118 | 160 | type_chosen=type_chosen, |
119 | 161 | order_data=settings.ORDERING_DATA, |
120 | 162 | date_format=date_format, |
121 | 163 | use_language=use_language, |
164 | + mimetype_chosen=mimetype_chosen if mimetype_chosen else '', | |
165 | + mimetype_choices=mimetype_choices, | |
166 | + size_chosen=size_chosen if size_chosen else '', | |
167 | + size_choices=size_choices, | |
168 | + used_by_chosen=used_by_chosen if used_by_chosen else '', | |
169 | + used_by_choices=used_by_choices, | |
122 | 170 | ) | ... | ... |
src/templates/search.html
... | ... | @@ -21,7 +21,7 @@ |
21 | 21 | |
22 | 22 | <ul class="none indent"> |
23 | 23 | <li {% ifequal type "wiki" %} title="{% trans "Remove filter" %}" {% endifequal %}> |
24 | - <span class="glyphicon glyphicon-file"></span> | |
24 | + <span class="glyphicon glyphicon-book"></span> | |
25 | 25 | <a href="{% ifnotequal type "wiki" %} {% append_to_get type='wiki' %} {% else %} {% append_to_get type="" %} {% endifnotequal %}">{% trans "Wiki" %}</a> |
26 | 26 | </li> |
27 | 27 | <li {% ifequal type "thread" %} title="{% trans "Remove filter" %}" {% endifequal %}> | ... | ... |
src/templates/search/search-wiki-preview.html
src/templates/search/search.html
... | ... | @@ -62,15 +62,69 @@ |
62 | 62 | {% for field_lookup, field_display, field_value in filters.fields %} |
63 | 63 | <div class="form-group"> |
64 | 64 | <label for="{{ field_lookup }}">{{ field_display }}</label> |
65 | - {% ifequal field_lookup "list" %} | |
65 | + {% if field_lookup == "list" %} | |
66 | 66 | <select name="{{ field_lookup }}" class="form-control" multiple> |
67 | 67 | {% for value, option in form.fields.list.choices %} |
68 | 68 | <option value="{{ value }}" {% if value in field_value %}selected{% endif %}>{{ option }}</option> |
69 | 69 | {% endfor %} |
70 | 70 | </select> |
71 | + {% elif field_lookup == "size" %} | |
72 | + <ul class="unstyled-list"> | |
73 | + {% for value, option in size_choices %} | |
74 | + {% with value|add:" "|add:size_chosen as sizelistadd %} | |
75 | + {% if value in field_value %} | |
76 | + <li class="selected" title="{% trans "Remove filter" %}"> | |
77 | + <span class="glyphicon glyphicon-remove"></span> | |
78 | + <a href="{% pop_from_get size=value %}">{{ option }}</a> | |
79 | + </li> | |
80 | + {% else %} | |
81 | + <li> | |
82 | + <span class="glyphicon glyphicon-chevron-right"></span> | |
83 | + <a href="{% append_to_get size=sizelistadd %}">{{ option }}</a> | |
84 | + </li> | |
85 | + {% endif %} | |
86 | + {% endwith %} | |
87 | + {% endfor %} | |
88 | + </ul> | |
89 | + {% elif field_lookup == "mimetype" %} | |
90 | + <ul class="unstyled-list"> | |
91 | + {% for value, option in mimetype_choices %} | |
92 | + {% with value|add:" "|add:mimetype_chosen as mimelistadd %} | |
93 | + {% if value in mime_chosen %} | |
94 | + <li class="selected" title="{% trans "Remove filter" %}"> | |
95 | + <span class="glyphicon glyphicon-remove"></span> | |
96 | + <a href="{% pop_from_get mimetype=value %}">{{ option }}</a> | |
97 | + </li> | |
98 | + {% else %} | |
99 | + <li> | |
100 | + <span class="glyphicon glyphicon-chevron-right"></span> | |
101 | + <a href="{% append_to_get mimetype=mimelistadd %}">{{ option }}</a> | |
102 | + </li> | |
103 | + {% endif %} | |
104 | + {% endwith %} | |
105 | + {% endfor %} | |
106 | + </ul> | |
107 | + {% elif field_lookup == "used_by" %} | |
108 | + <ul class="unstyled-list"> | |
109 | + {% for value, option in used_by_choices %} | |
110 | + {% with value|add:" "|add:used_by_chosen as used_byadd %} | |
111 | + {% if value in used_by_chosen %} | |
112 | + <li class="selected" title="{% trans "Remove filter" %}"> | |
113 | + <span class="glyphicon glyphicon-remove"></span> | |
114 | + <a href="{% pop_from_get used_by=value %}">{{ option }}</a> | |
115 | + </li> | |
116 | + {% else %} | |
117 | + <li> | |
118 | + <span class="glyphicon glyphicon-chevron-right"></span> | |
119 | + <a href="{% append_to_get used_by=used_byadd %}">{{ option }}</a> | |
120 | + </li> | |
121 | + {% endif %} | |
122 | + {% endwith %} | |
123 | + {% endfor %} | |
124 | + </ul> | |
71 | 125 | {% else %} |
72 | 126 | <input type="text" class="form-control" placeholder="{{ field_display }}" name="{{ field_lookup }}" {% if field_value %}value="{{ field_value }}"{% endif %}> |
73 | - {% endifequal %} | |
127 | + {% endif %} | |
74 | 128 | </div> |
75 | 129 | {% endfor %} |
76 | 130 | <button type="submit" class="btn btn-default pull-right"> |
... | ... | @@ -101,7 +155,7 @@ |
101 | 155 | |
102 | 156 | <ul class="unstyled-list"> |
103 | 157 | <li> |
104 | - <span class="glyphicon glyphicon-file"></span> | |
158 | + <span class="glyphicon glyphicon-book"></span> | |
105 | 159 | <a href="{% append_to_get type='wiki' %}">{% trans "Wiki" %}</a> |
106 | 160 | </li> |
107 | 161 | <li> |
... | ... | @@ -120,6 +174,10 @@ |
120 | 174 | <span class="glyphicon glyphicon-user"></span> |
121 | 175 | <a href="{% append_to_get type='user' %}">{% trans "User" %}</a> |
122 | 176 | </li> |
177 | + <li> | |
178 | + <span class="glyphicon glyphicon-file"></span> | |
179 | + <a href="{% append_to_get type='attachment' %}">{% trans "Attachment" %}</a> | |
180 | + </li> | |
123 | 181 | </ul> |
124 | 182 | {% endif %} |
125 | 183 | <hr /> | ... | ... |