diff --git a/requirements.txt b/requirements.txt index ff4aad7..280cab0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,6 +10,7 @@ django-cliauth==0.9 django-mobile==0.3.0 django-haystack==2.1 pysolr==2.1 +poster==0.8.1 etiquetando==0.1 html2text django-taggit diff --git a/src/colab/custom_settings.py b/src/colab/custom_settings.py index dafca82..cdf1323 100644 --- a/src/colab/custom_settings.py +++ b/src/colab/custom_settings.py @@ -20,6 +20,9 @@ DJANGO_DATE_FORMAT_TO_JS = { LANGUAGE_CODE = 'pt-br' +# The absolute path to the folder containing the attachments +ATTACHMENTS_FOLDER_PATH = '' + # ORDERING_DATA receives the options to order for as it's keys and a dict as # value, if you want to order for the last name, you can use something like: # 'last_name': {'name': 'Last Name', 'fields': 'last_name'} inside the dict, @@ -39,6 +42,23 @@ ORDERING_DATA = { }, } +# File type groupings is a tuple of tuples containg what it should filter, +# how it should be displayed, and a tuple of which mimetypes it includes +FILE_TYPE_GROUPINGS = ( + ('document', gettext(u'Document'), + ('doc', 'docx', 'odt', 'otx', 'dotx', 'pdf', 'ott')), + ('presentation', gettext(u'Presentation'), ('ppt', 'pptx', 'odp')), + ('text', gettext(u'Text'), ('txt', 'po', 'conf', 'log')), + ('code', gettext(u'Code'), + ('py', 'php', 'js', 'sql', 'sh', 'patch', 'diff', 'html', '')), + ('compressed', gettext(u'Compressed'), ('rar', 'zip', 'gz', 'tgz', 'bz2')), + ('image', gettext(u'Image'), + ('jpg', 'jpeg', 'png', 'tiff', 'gif', 'svg', 'psd', 'planner', 'cdr')), + ('spreadsheet', gettext(u'Spreadsheet'), + ('ods', 'xls', 'xlsx', 'xslt', 'csv')), +) + + # the following variable define how many characters should be shown before # a highlighted word, to make sure that the highlighted word will appear HIGHLIGHT_NUM_CHARS_BEFORE_MATCH = 30 diff --git a/src/proxy/migrations/0003_create_attachment_view.py b/src/proxy/migrations/0003_create_attachment_view.py index bdb8ea8..16b6c18 100644 --- a/src/proxy/migrations/0003_create_attachment_view.py +++ b/src/proxy/migrations/0003_create_attachment_view.py @@ -17,7 +17,8 @@ class Migration(DataMigration): CONCAT(attachment.type, '/' , attachment.id, '/', attachment.filename) AS url, attachment.type AS used_by, attachment.filename AS filename, - (SELECT LOWER(SUBSTRING(attachment.filename FROM '\w{2,3}$'))) AS mimetype, + attachment.id as attach_id, + (SELECT LOWER(SUBSTRING(attachment.filename FROM '\.(\w+)$'))) AS mimetype, attachment.author AS author, attachment.description AS description, attachment.size AS size, diff --git a/src/proxy/models.py b/src/proxy/models.py index d618193..fd1470f 100644 --- a/src/proxy/models.py +++ b/src/proxy/models.py @@ -1,5 +1,9 @@ # -*- coding: utf-8 -*- +import os +import urllib2 + +from django.conf import settings from django.db import models from accounts.models import User @@ -8,17 +12,28 @@ from hitcount.models import HitCountModelMixin class Attachment(models.Model, HitCountModelMixin): url = models.TextField(primary_key=True) + attach_id = models.TextField() used_by = models.TextField() filename = models.TextField() author = models.TextField(blank=True) description = models.TextField(blank=True) created = models.DateTimeField(blank=True) mimetype = models.TextField(blank=True) + size = models.IntegerField(blank=True) class Meta: managed = False db_table = 'attachment_view' + @property + def filepath(self): + return os.path.join( + settings.ATTACHMENTS_FOLDER_PATH, + self.used_by, + self.attach_id, + urllib2.quote(self.filename.encode('utf8')) + ) + def get_absolute_url(self): return u'/raw-attachment/{}'.format(self.url) diff --git a/src/proxy/search_indexes.py b/src/proxy/search_indexes.py index 8b831ca..5f580e5 100644 --- a/src/proxy/search_indexes.py +++ b/src/proxy/search_indexes.py @@ -1,14 +1,74 @@ # -*- coding: utf-8 -*- import math +import string -from datetime import datetime - -from django.db.models import Q +from django.template import loader, Context +from django.utils.text import slugify from haystack import indexes +from haystack.utils import log as logging from search.base_indexes import BaseIndex -from .models import Ticket, Wiki, Revision +from .models import Attachment, Ticket, Wiki, Revision + + +logger = logging.getLogger('haystack') + +# the string maketrans always return a string encoded with latin1 +# http://stackoverflow.com/questions/1324067/how-do-i-get-str-translate-to-work-with-unicode-strings +table = string.maketrans( + string.punctuation, + '.' * len(string.punctuation) +).decode('latin1') + + +class AttachmentIndex(BaseIndex, indexes.Indexable): + title = indexes.CharField(model_attr='filename') + description = indexes.CharField(model_attr='description', null=True) + modified = indexes.DateTimeField(model_attr='created', null=True) + used_by = indexes.CharField(model_attr='used_by', null=True, stored=False) + mimetype = indexes.CharField( + model_attr='mimetype', + null=True, + stored=False + ) + size = indexes.IntegerField(model_attr='size', null=True, stored=False) + filename = indexes.CharField(stored=False) + + def get_model(self): + return Attachment + + def get_updated_field(self): + return 'created' + + def prepare(self, obj): + data = super(AttachmentIndex, self).prepare(obj) + + try: + file_obj = open(obj.filepath) + except IOError as e: + logger.warning(u'IOError: %s - %s', e.strerror, e.filename) + return data + backend = self._get_backend(None) + extracted_data = backend.extract_file_contents(file_obj) + + t = loader.select_template( + ('search/indexes/proxy/attachment_text.txt', ) + ) + data['text'] = t.render(Context({ + 'object': obj, + 'extracted': extracted_data, + })) + return data + + def prepare_filename(self, obj): + return obj.filename.translate(table).replace('.', ' ') + + def prepare_icon_name(self, obj): + return u'file' + + def prepare_type(self, obj): + return u'attachment' class WikiIndex(BaseIndex, indexes.Indexable): @@ -26,7 +86,7 @@ class WikiIndex(BaseIndex, indexes.Indexable): return u'{}\n{}'.format(obj.wiki_text, obj.collaborators) def prepare_icon_name(self, obj): - return u'file' + return u'book' def prepare_type(self, obj): return u'wiki' diff --git a/src/proxy/templates/search/indexes/proxy/attachment_text.txt b/src/proxy/templates/search/indexes/proxy/attachment_text.txt new file mode 100644 index 0000000..9b22bae --- /dev/null +++ b/src/proxy/templates/search/indexes/proxy/attachment_text.txt @@ -0,0 +1,15 @@ +{{ object.filename }} +{{ object.filename|slugify }} +{{ object.description }} +{{ object.description|slugify }} +{{ object.used_by }} +{{ object.mimetype }} +{{ object.get_author.get_full_name }} + +{% for k, v in extracted.metadata.items %} + {% for val in v %} + {{ k }}: {{ val|safe }} + {% endfor %} +{% endfor %} + +{{ extracted.contents|striptags|safe }} diff --git a/src/search/forms.py b/src/search/forms.py index 2262c66..7970b3a 100644 --- a/src/search/forms.py +++ b/src/search/forms.py @@ -23,8 +23,8 @@ class ColabSearchForm(SearchForm): list = forms.MultipleChoiceField( required=False, label=_(u'Mailinglist'), - choices=[(v, v) for v in MailingList.objects.values('name') - for (v, v) in v.items()] + choices=[(v, v) for v in MailingList.objects.values_list( + 'name', flat=True)] ) milestone = forms.CharField(required=False, label=_(u'Milestone')) priority = forms.CharField(required=False, label=_(u'Priority')) @@ -40,30 +40,71 @@ class ColabSearchForm(SearchForm): role = forms.CharField(required=False, label=_(u'Role')) since = forms.DateField(required=False, label=_(u'Since')) until = forms.DateField(required=False, label=_(u'Until')) + filename = forms.CharField(required=False, label=_(u'Filename')) + used_by = forms.CharField(required=False, label=_(u'Used by')) + mimetype = forms.CharField(required=False, label=_(u'File type')) + size = forms.CharField(required=False, label=_(u'Size')) def search(self): if not self.is_valid(): return self.no_query_found() + # filter_or goes here + sqs = self.searchqueryset.all() + mimetype = self.cleaned_data['mimetype'] + if mimetype: + filter_mimetypes = {'mimetype__in': []} + for type_, display, mimelist in settings.FILE_TYPE_GROUPINGS: + if type_ in mimetype: + filter_mimetypes['mimetype__in'] += mimelist + if not self.cleaned_data['size']: + sqs = sqs.filter_or(mimetype__in=mimelist) + + if self.cleaned_data['size']: + # (1024 * 1024) / 2 + # (1024 * 1024) * 10 + filter_sizes = {} + filter_sizes_exp = {} + if '<500KB' in self.cleaned_data['size']: + filter_sizes['size__lt'] = 524288 + if '500KB__10MB' in self.cleaned_data['size']: + filter_sizes_exp['size__gte'] = 524288 + filter_sizes_exp['size__lte'] = 10485760 + if '>10MB' in self.cleaned_data['size']: + filter_sizes['size__gt'] = 10485760 + + if self.cleaned_data['mimetype']: + # Add the mimetypes filters to this dict and filter it + if filter_sizes_exp: + filter_sizes_exp.update(filter_mimetypes) + sqs = sqs.filter_or(**filter_sizes_exp) + for filter_or in filter_sizes.items(): + filter_or = dict((filter_or, )) + filter_or.update(filter_mimetypes) + sqs = sqs.filter_or(**filter_or) + else: + for filter_or in filter_sizes.items(): + filter_or = dict((filter_or, )) + sqs = sqs.filter_or(**filter_or) + sqs = sqs.filter_or(**filter_sizes_exp) + + if self.cleaned_data['used_by']: + sqs = sqs.filter_or(used_by__in=self.cleaned_data['used_by'].split()) + if self.cleaned_data.get('q'): q = unicodedata.normalize( 'NFKD', unicode(self.cleaned_data.get('q')) ).encode('ascii', 'ignore') - sqs = self.searchqueryset.auto_query(q) + sqs = sqs.auto_query(q) sqs = sqs.filter(content=AltParser( 'dismax', q, pf='title^2.1 author^1.9 description^1.7', mm='2<70%' )) - else: - sqs = self.searchqueryset.all() - if self.cleaned_data['type']: - "It will consider other types with a whitespace" - types = self.cleaned_data['type'] - sqs = sqs.filter(type__in=types.split()) + sqs = sqs.filter(type=self.cleaned_data['type']) if self.cleaned_data['order']: for option, dict_order in settings.ORDERING_DATA.items(): @@ -111,6 +152,9 @@ class ColabSearchForm(SearchForm): if self.cleaned_data['until']: sqs = sqs.filter(modified__lte=self.cleaned_data['until']) + if self.cleaned_data['filename']: + sqs = sqs.filter(filename=self.cleaned_data['filename']) + if self.load_all: sqs = sqs.load_all() diff --git a/src/search/views.py b/src/search/views.py index a223d1a..41adf9f 100644 --- a/src/search/views.py +++ b/src/search/views.py @@ -5,6 +5,8 @@ from django.utils.translation import ugettext as _ from haystack.views import SearchView +from proxy.models import Attachment + class ColabSearchView(SearchView): def extra_context(self, *args, **kwargs): @@ -106,6 +108,26 @@ class ColabSearchView(SearchView): ('role', _(u'Role'), self.request.GET.get('role')) ), }, + 'attachment': { + 'name': _(u'Attachment'), + 'fields': ( + ( + 'filename', + _(u'Filename'), + self.request.GET.get('filename') + ), + ('author', _(u'Author'), self.request.GET.get('author')), + ( + 'used_by', + _(u'Used by'), self.request.GET.get('used_by')), + ( + 'mimetype', + _(u'File type'), + self.request.GET.get('mimetype') + ), + ('size', _(u'Size'), self.request.GET.get('size')), + ) + } } try: @@ -113,10 +135,36 @@ class ColabSearchView(SearchView): except AttributeError: type_chosen = '' + mimetype_choices = () + size_choices = () + used_by_choices = () + + if type_chosen == 'attachment': + mimetype_choices = [(type_, display) for type_, display, mimelist_ in settings.FILE_TYPE_GROUPINGS] + size_choices = [ + ('<500KB', u'< 500 KB'), + ('500KB__10MB', u'>= 500 KB <= 10 MB'), + ('>10MB', u'> 10 MB'), + ] + used_by_choices = set([ + (v, v) for v in Attachment.objects.values_list( + 'used_by', flat=True) + ]) + + mimetype_chosen = self.request.GET.get('mimetype') + size_chosen = self.request.GET.get('size') + used_by_chosen = self.request.GET.get('used_by') + return dict( filters=types.get(type_chosen), type_chosen=type_chosen, order_data=settings.ORDERING_DATA, date_format=date_format, use_language=use_language, + mimetype_chosen=mimetype_chosen if mimetype_chosen else '', + mimetype_choices=mimetype_choices, + size_chosen=size_chosen if size_chosen else '', + size_choices=size_choices, + used_by_chosen=used_by_chosen if used_by_chosen else '', + used_by_choices=used_by_choices, ) diff --git a/src/templates/search.html b/src/templates/search.html index 14f4104..d6104f3 100644 --- a/src/templates/search.html +++ b/src/templates/search.html @@ -21,7 +21,7 @@