Commit 92b6380bb699cdb843cb3994e24ac49ec1789d70

Authored by Luan
1 parent f6d5f22e

removing solrutils file

Showing 1 changed file with 0 additions and 254 deletions   Show diff stats
src/colab/deprecated/solrutils.py
@@ -1,254 +0,0 @@ @@ -1,254 +0,0 @@
1 -#!/usr/bin/env python  
2 -# encoding: utf-8  
3 -  
4 -import math  
5 -import json  
6 -import urllib  
7 -import socket  
8 -import logging  
9 -import httplib  
10 -  
11 -from dateutil.parser import parse as parse_timestamp  
12 -  
13 -from django.conf import settings  
14 -  
15 -from super_archives.models import EmailAddress  
16 -  
17 -  
18 -def build_query(user_query, filters=None):  
19 - """Build the query that will be sent to Solr"""  
20 -  
21 - if not user_query:  
22 - user_query = '*'  
23 -  
24 - query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query  
25 - if filters:  
26 - query = "(%s)" % query  
27 -  
28 - for (key, value) in filters.items():  
29 - if value:  
30 - query += " AND %s:%s" % (key, value)  
31 -  
32 - logging.info(query)  
33 - return query.encode('utf-8')  
34 -  
35 -  
36 -def parse_document_timestamps(doc, date_attrs=('modified', 'created')):  
37 - """Converts the `modified' and `created' dates from  
38 - ISO 8601 format to a date time object for the given  
39 - document.  
40 -  
41 - """  
42 -  
43 - for date in date_attrs:  
44 - date_str = doc.get(date)  
45 - try:  
46 - date_obj = parse_timestamp(date_str)  
47 - except ValueError:  
48 - logging.error('Error trying to parse "%s"', date_str)  
49 - date_obj = None  
50 - doc.update({date: date_obj})  
51 -  
52 - return doc  
53 -  
54 -  
55 -def get_document_url(doc):  
56 - """Set the url attribute for a document using the path_string.  
57 - In case the resource comes from an external domain it will  
58 - be prepended to this URL.  
59 -  
60 - """  
61 - doc_type = doc.get('Type')  
62 -  
63 - url = ''  
64 - if settings.SOLR_COLAB_URI:  
65 - url += settings.SOLR_COLAB_URI  
66 -  
67 - url += doc.get('path_string', '')  
68 - doc.update({'url': url})  
69 -  
70 - return doc  
71 -  
72 -  
73 -def get_document_from_addr(doc):  
74 - """Get a EmailAddress instance for the given document if  
75 - its available.  
76 -  
77 - """  
78 -  
79 - username = doc.get('last_author')  
80 - if not username:  
81 - username = doc.get('Creator')  
82 - from_addresses = EmailAddress.objects.filter(user__username=username)  
83 - if username and from_addresses:  
84 - doc.update({'from_address': from_addresses[0]})  
85 -  
86 -  
87 -def add_attrs_to_doc(doc):  
88 - """Wraps the call of functions that adds or modifies keys  
89 - of the giving doc (which should be a dict).  
90 -  
91 - """  
92 - get_document_url(doc)  
93 - parse_document_timestamps(doc)  
94 - get_document_from_addr(doc)  
95 -  
96 -  
97 -class SolrPaginator(list):  
98 -  
99 - def __init__(self, response_dict, current_page):  
100 - super(SolrPaginator, self).__init__()  
101 -  
102 - responseHeader = response_dict.get('responseHeader', {})  
103 - response = response_dict.get('response', {})  
104 - request_params = responseHeader.get('params', {})  
105 -  
106 - docs = response.get('docs', [])  
107 - self.extend(docs)  
108 -  
109 - self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0  
110 -  
111 - self.per_page = int(request_params.get('rows', 10))  
112 - self.numFound = int(response.get('numFound', 0))  
113 - self.page_num = current_page  
114 -  
115 - self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))  
116 -  
117 - self.has_previous = self.page_num > 1  
118 - if self.has_previous:  
119 - self.previous_page_number = self.page_num - 1  
120 - else:  
121 - self.previous_page_number = None  
122 -  
123 - self.has_next = self.page_num < self.num_of_pages  
124 - if self.has_next:  
125 - self.next_page_number = self.page_num + 1  
126 - else:  
127 - self.next_page_number = None  
128 -  
129 - @property  
130 - def last_page(self):  
131 - return self.num_of_pages  
132 -  
133 -  
134 -def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):  
135 - """Perform a select in a Solr instance using the configuration  
136 - set in settings.py.  
137 -  
138 - """  
139 -  
140 - if not settings.SOLR_HOSTNAME:  
141 - return {}  
142 -  
143 - data = {  
144 - 'q': query,  
145 - 'wt': 'json',  
146 - }  
147 -  
148 - # Number of results per page  
149 - if results_per_page:  
150 - data.update({'rows': results_per_page})  
151 -  
152 - # Page number  
153 - if page_number:  
154 - data.update({  
155 - 'start': (page_number - 1) * results_per_page,  
156 - })  
157 -  
158 - # Sort order  
159 - if sort:  
160 - data.update({  
161 - 'sort': sort,  
162 - })  
163 -  
164 - # Only select those fields  
165 - if fields:  
166 - data.update({  
167 - 'fl': ','.join(fields),  
168 - })  
169 - # First version of this was implemented using urllib2 and was  
170 - # a milion times easier but unfortunatelly urllib2.urlopen  
171 - # does not support http headers. Without setting http headers  
172 - # for charset the solr server tries to decode utf-8 params  
173 - # as ASCII causing it to crash. HTTPConnection deals with  
174 - # encodings automagically.  
175 - solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,  
176 - settings.SOLR_PORT)  
177 - query_params = urllib.urlencode(data)  
178 - solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params  
179 -  
180 - try:  
181 - solr_conn.request('GET', solr_select_uri)  
182 - solr_response = solr_conn.getresponse()  
183 - except socket.error as err:  
184 - solr_response = None  
185 - logging.exception(err)  
186 -  
187 - if solr_response and solr_response.status == 200:  
188 - #TODO: Log error connecting to solr  
189 - solr_json_resp = solr_response.read()  
190 - solr_dict_resp = json.loads(solr_json_resp)  
191 - else:  
192 - solr_dict_resp = {}  
193 -  
194 - docs = solr_dict_resp.get('response', {}).get("docs", [])  
195 -  
196 - if link_attrs:  
197 - # Loop over all documents adding or linking its information  
198 - # with the data from this app or database  
199 - map(add_attrs_to_doc, docs)  
200 -  
201 - return solr_dict_resp  
202 -  
203 -  
204 -def get_latest_collaborations(number=10, username=None):  
205 - """Get the n documents recently modified that this username  
206 - has helped in somehow.  
207 -  
208 - """  
209 -  
210 - if username:  
211 - filters = {'collaborator': username}  
212 - else:  
213 - filters = None  
214 -  
215 - query = build_query('*', filters)  
216 - solr_response = select(  
217 - query=query,  
218 - results_per_page=number,  
219 - sort='modified desc'  
220 - )  
221 -  
222 - return solr_response.get('response', {}).get('docs', [])  
223 -  
224 -  
225 -def count_types(sample=100, filters=None):  
226 - """Count the type of the last modifications returning the  
227 - results in dict.  
228 -  
229 - Example: {  
230 - 'wiki' 30,  
231 - 'thread': 40,  
232 - 'ticket', 10,  
233 - 'changeset' 20,  
234 - }  
235 -  
236 - """  
237 -  
238 - query = build_query('*', filters)  
239 - solr_response = select(  
240 - query=query,  
241 - results_per_page=sample,  
242 - sort='modified desc',  
243 - link_attrs=False,  
244 - )  
245 -  
246 - docs = solr_response.get('response', {}).get('docs', [])  
247 -  
248 - type_count = {}  
249 - for doc in docs:  
250 - doc_type = doc.get('Type')  
251 - count = type_count.get(doc_type, 0) + 1  
252 - type_count.update({doc_type: count})  
253 -  
254 - return type_count