Commit d92e036407f29112cebcf426e1c2fe4ce04c6376

Authored by Sergio Oliveira
2 parents 17e3bdf6 9e9fe225

Merge branch 'master' of github.com:TracyWebTech/colab

src/colab/deprecated/solrutils.py
@@ -1,254 +0,0 @@ @@ -1,254 +0,0 @@
1 -#!/usr/bin/env python  
2 -# encoding: utf-8  
3 -  
4 -import math  
5 -import json  
6 -import urllib  
7 -import socket  
8 -import logging  
9 -import httplib  
10 -  
11 -from dateutil.parser import parse as parse_timestamp  
12 -  
13 -from django.conf import settings  
14 -  
15 -from super_archives.models import EmailAddress  
16 -  
17 -  
18 -def build_query(user_query, filters=None):  
19 - """Build the query that will be sent to Solr"""  
20 -  
21 - if not user_query:  
22 - user_query = '*'  
23 -  
24 - query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query  
25 - if filters:  
26 - query = "(%s)" % query  
27 -  
28 - for (key, value) in filters.items():  
29 - if value:  
30 - query += " AND %s:%s" % (key, value)  
31 -  
32 - logging.info(query)  
33 - return query.encode('utf-8')  
34 -  
35 -  
36 -def parse_document_timestamps(doc, date_attrs=('modified', 'created')):  
37 - """Converts the `modified' and `created' dates from  
38 - ISO 8601 format to a date time object for the given  
39 - document.  
40 -  
41 - """  
42 -  
43 - for date in date_attrs:  
44 - date_str = doc.get(date)  
45 - try:  
46 - date_obj = parse_timestamp(date_str)  
47 - except ValueError:  
48 - logging.error('Error trying to parse "%s"', date_str)  
49 - date_obj = None  
50 - doc.update({date: date_obj})  
51 -  
52 - return doc  
53 -  
54 -  
55 -def get_document_url(doc):  
56 - """Set the url attribute for a document using the path_string.  
57 - In case the resource comes from an external domain it will  
58 - be prepended to this URL.  
59 -  
60 - """  
61 - doc_type = doc.get('Type')  
62 -  
63 - url = ''  
64 - if settings.SOLR_COLAB_URI:  
65 - url += settings.SOLR_COLAB_URI  
66 -  
67 - url += doc.get('path_string', '')  
68 - doc.update({'url': url})  
69 -  
70 - return doc  
71 -  
72 -  
73 -def get_document_from_addr(doc):  
74 - """Get a EmailAddress instance for the given document if  
75 - its available.  
76 -  
77 - """  
78 -  
79 - username = doc.get('last_author')  
80 - if not username:  
81 - username = doc.get('Creator')  
82 - from_addresses = EmailAddress.objects.filter(user__username=username)  
83 - if username and from_addresses:  
84 - doc.update({'from_address': from_addresses[0]})  
85 -  
86 -  
87 -def add_attrs_to_doc(doc):  
88 - """Wraps the call of functions that adds or modifies keys  
89 - of the giving doc (which should be a dict).  
90 -  
91 - """  
92 - get_document_url(doc)  
93 - parse_document_timestamps(doc)  
94 - get_document_from_addr(doc)  
95 -  
96 -  
97 -class SolrPaginator(list):  
98 -  
99 - def __init__(self, response_dict, current_page):  
100 - super(SolrPaginator, self).__init__()  
101 -  
102 - responseHeader = response_dict.get('responseHeader', {})  
103 - response = response_dict.get('response', {})  
104 - request_params = responseHeader.get('params', {})  
105 -  
106 - docs = response.get('docs', [])  
107 - self.extend(docs)  
108 -  
109 - self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0  
110 -  
111 - self.per_page = int(request_params.get('rows', 10))  
112 - self.numFound = int(response.get('numFound', 0))  
113 - self.page_num = current_page  
114 -  
115 - self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))  
116 -  
117 - self.has_previous = self.page_num > 1  
118 - if self.has_previous:  
119 - self.previous_page_number = self.page_num - 1  
120 - else:  
121 - self.previous_page_number = None  
122 -  
123 - self.has_next = self.page_num < self.num_of_pages  
124 - if self.has_next:  
125 - self.next_page_number = self.page_num + 1  
126 - else:  
127 - self.next_page_number = None  
128 -  
129 - @property  
130 - def last_page(self):  
131 - return self.num_of_pages  
132 -  
133 -  
134 -def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):  
135 - """Perform a select in a Solr instance using the configuration  
136 - set in settings.py.  
137 -  
138 - """  
139 -  
140 - if not settings.SOLR_HOSTNAME:  
141 - return {}  
142 -  
143 - data = {  
144 - 'q': query,  
145 - 'wt': 'json',  
146 - }  
147 -  
148 - # Number of results per page  
149 - if results_per_page:  
150 - data.update({'rows': results_per_page})  
151 -  
152 - # Page number  
153 - if page_number:  
154 - data.update({  
155 - 'start': (page_number - 1) * results_per_page,  
156 - })  
157 -  
158 - # Sort order  
159 - if sort:  
160 - data.update({  
161 - 'sort': sort,  
162 - })  
163 -  
164 - # Only select those fields  
165 - if fields:  
166 - data.update({  
167 - 'fl': ','.join(fields),  
168 - })  
169 - # First version of this was implemented using urllib2 and was  
170 - # a milion times easier but unfortunatelly urllib2.urlopen  
171 - # does not support http headers. Without setting http headers  
172 - # for charset the solr server tries to decode utf-8 params  
173 - # as ASCII causing it to crash. HTTPConnection deals with  
174 - # encodings automagically.  
175 - solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,  
176 - settings.SOLR_PORT)  
177 - query_params = urllib.urlencode(data)  
178 - solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params  
179 -  
180 - try:  
181 - solr_conn.request('GET', solr_select_uri)  
182 - solr_response = solr_conn.getresponse()  
183 - except socket.error as err:  
184 - solr_response = None  
185 - logging.exception(err)  
186 -  
187 - if solr_response and solr_response.status == 200:  
188 - #TODO: Log error connecting to solr  
189 - solr_json_resp = solr_response.read()  
190 - solr_dict_resp = json.loads(solr_json_resp)  
191 - else:  
192 - solr_dict_resp = {}  
193 -  
194 - docs = solr_dict_resp.get('response', {}).get("docs", [])  
195 -  
196 - if link_attrs:  
197 - # Loop over all documents adding or linking its information  
198 - # with the data from this app or database  
199 - map(add_attrs_to_doc, docs)  
200 -  
201 - return solr_dict_resp  
202 -  
203 -  
204 -def get_latest_collaborations(number=10, username=None):  
205 - """Get the n documents recently modified that this username  
206 - has helped in somehow.  
207 -  
208 - """  
209 -  
210 - if username:  
211 - filters = {'collaborator': username}  
212 - else:  
213 - filters = None  
214 -  
215 - query = build_query('*', filters)  
216 - solr_response = select(  
217 - query=query,  
218 - results_per_page=number,  
219 - sort='modified desc'  
220 - )  
221 -  
222 - return solr_response.get('response', {}).get('docs', [])  
223 -  
224 -  
225 -def count_types(sample=100, filters=None):  
226 - """Count the type of the last modifications returning the  
227 - results in dict.  
228 -  
229 - Example: {  
230 - 'wiki' 30,  
231 - 'thread': 40,  
232 - 'ticket', 10,  
233 - 'changeset' 20,  
234 - }  
235 -  
236 - """  
237 -  
238 - query = build_query('*', filters)  
239 - solr_response = select(  
240 - query=query,  
241 - results_per_page=sample,  
242 - sort='modified desc',  
243 - link_attrs=False,  
244 - )  
245 -  
246 - docs = solr_response.get('response', {}).get('docs', [])  
247 -  
248 - type_count = {}  
249 - for doc in docs:  
250 - doc_type = doc.get('Type')  
251 - count = type_count.get(doc_type, 0) + 1  
252 - type_count.update({doc_type: count})  
253 -  
254 - return type_count  
src/rss/feeds.py
@@ -4,9 +4,10 @@ @@ -4,9 +4,10 @@
4 from django.contrib.syndication.views import Feed 4 from django.contrib.syndication.views import Feed
5 from django.utils.translation import ugettext as _ 5 from django.utils.translation import ugettext as _
6 6
  7 +from haystack.query import SearchQuerySet
  8 +
7 from super_archives.models import Thread 9 from super_archives.models import Thread
8 from super_archives import queries 10 from super_archives import queries
9 -from colab.deprecated import solrutils  
10 11
11 12
12 class LatestThreadsFeeds(Feed): 13 class LatestThreadsFeeds(Feed):
@@ -21,7 +22,7 @@ class LatestThreadsFeeds(Feed): @@ -21,7 +22,7 @@ class LatestThreadsFeeds(Feed):
21 22
22 def item_title(self, item): 23 def item_title(self, item):
23 title = '[' + item.mailinglist.name + '] ' 24 title = '[' + item.mailinglist.name + '] '
24 - title += item.latest_message.subject_clean 25 + title += item.latest_message.subject_clean
25 return title 26 return title
26 27
27 def item_description(self, item): 28 def item_description(self, item):
@@ -40,8 +41,8 @@ class HottestThreadsFeeds(Feed): @@ -40,8 +41,8 @@ class HottestThreadsFeeds(Feed):
40 41
41 def item_title(self, item): 42 def item_title(self, item):
42 title = '[' + item.mailinglist.name + '] ' 43 title = '[' + item.mailinglist.name + '] '
43 - title += item.latest_message.subject_clean  
44 - return title 44 + title += item.latest_message.subject_clean
  45 + return title
45 46
46 def item_description(self, item): 47 def item_description(self, item):
47 return item.latest_message.body 48 return item.latest_message.body
@@ -52,28 +53,22 @@ class LatestColabFeeds(Feed): @@ -52,28 +53,22 @@ class LatestColabFeeds(Feed):
52 link = '/rss/colab/latest/' 53 link = '/rss/colab/latest/'
53 54
54 def items(self): 55 def items(self):
55 - items = solrutils.get_latest_collaborations(20) 56 + items = SearchQuerySet().order_by('-modified', '-created')[:20]
56 return items 57 return items
57 58
58 def item_title(self, item): 59 def item_title(self, item):
59 - type_ = item.get('Type') + ': '  
60 - mailinglist = item.get('mailinglist') 60 + type_ = item.type + ': '
  61 + mailinglist = item.tag
61 62
62 if mailinglist: 63 if mailinglist:
63 prefix = type_ + mailinglist + ' - ' 64 prefix = type_ + mailinglist + ' - '
64 else: 65 else:
65 prefix = type_ 66 prefix = type_
66 67
67 - return prefix + item.get('Title') 68 + return prefix + item.title
68 69
69 def item_description(self, item): 70 def item_description(self, item):
70 - return item.get('Description') 71 + return item.latest_description
71 72
72 def item_link(self, item): 73 def item_link(self, item):
73 - if item.get('Type') != 'thread':  
74 - url = item.get('url')  
75 - else:  
76 - url = 'http://colab.interlegis.leg.br'  
77 - url += item.get('url')  
78 - return url  
79 - 74 + return item.url
src/super_archives/search_indexes.py
@@ -11,6 +11,7 @@ class ThreadIndex(indexes.SearchIndex, indexes.Indexable): @@ -11,6 +11,7 @@ class ThreadIndex(indexes.SearchIndex, indexes.Indexable):
11 url = indexes.CharField(model_attr='get_absolute_url', null=True) 11 url = indexes.CharField(model_attr='get_absolute_url', null=True)
12 title = indexes.CharField(model_attr='latest_message__subject_clean') 12 title = indexes.CharField(model_attr='latest_message__subject_clean')
13 description = indexes.CharField(use_template=True) 13 description = indexes.CharField(use_template=True)
  14 + latest_description = indexes.CharField(model_attr='latest_message__body')
14 created = indexes.DateTimeField() 15 created = indexes.DateTimeField()
15 modified = indexes.DateTimeField( 16 modified = indexes.DateTimeField(
16 model_attr='latest_message__received_time' 17 model_attr='latest_message__received_time'
src/super_archives/templates/search/indexes/super_archives/thread_description.txt
1 {% for message in object.message_set.iterator %} 1 {% for message in object.message_set.iterator %}
2 {% if not spam %} 2 {% if not spam %}
3 - {{ message.subject_clean }}  
4 - {{ message.subject_clean|slugify }}  
5 {{ message.body }} 3 {{ message.body }}
6 - {{ message.body|slugify }}  
7 - {{ message.from_address.get_full_name }}  
8 - {{ message.from_address.get_full_name|slugify }}  
9 {% endif %} 4 {% endif %}
10 {% endfor %} 5 {% endfor %}
src/super_archives/templates/search/indexes/super_archives/thread_text.txt
@@ -11,8 +11,6 @@ @@ -11,8 +11,6 @@
11 11
12 {% for message in object.message_set.iterator %} 12 {% for message in object.message_set.iterator %}
13 {% if not spam %} 13 {% if not spam %}
14 - {{ message.subject_clean }}  
15 - {{ message.subject_clean|slugify }}  
16 {{ message.body }} 14 {{ message.body }}
17 {{ message.body|slugify }} 15 {{ message.body|slugify }}
18 {{ message.from_address.get_full_name }} 16 {{ message.from_address.get_full_name }}