Commit 92b6380bb699cdb843cb3994e24ac49ec1789d70

Authored by Luan
1 parent f6d5f22e

removing solrutils file

Showing 1 changed file with 0 additions and 254 deletions   Show diff stats
src/colab/deprecated/solrutils.py
... ... @@ -1,254 +0,0 @@
1   -#!/usr/bin/env python
2   -# encoding: utf-8
3   -
4   -import math
5   -import json
6   -import urllib
7   -import socket
8   -import logging
9   -import httplib
10   -
11   -from dateutil.parser import parse as parse_timestamp
12   -
13   -from django.conf import settings
14   -
15   -from super_archives.models import EmailAddress
16   -
17   -
18   -def build_query(user_query, filters=None):
19   - """Build the query that will be sent to Solr"""
20   -
21   - if not user_query:
22   - user_query = '*'
23   -
24   - query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
25   - if filters:
26   - query = "(%s)" % query
27   -
28   - for (key, value) in filters.items():
29   - if value:
30   - query += " AND %s:%s" % (key, value)
31   -
32   - logging.info(query)
33   - return query.encode('utf-8')
34   -
35   -
36   -def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
37   - """Converts the `modified' and `created' dates from
38   - ISO 8601 format to a date time object for the given
39   - document.
40   -
41   - """
42   -
43   - for date in date_attrs:
44   - date_str = doc.get(date)
45   - try:
46   - date_obj = parse_timestamp(date_str)
47   - except ValueError:
48   - logging.error('Error trying to parse "%s"', date_str)
49   - date_obj = None
50   - doc.update({date: date_obj})
51   -
52   - return doc
53   -
54   -
55   -def get_document_url(doc):
56   - """Set the url attribute for a document using the path_string.
57   - In case the resource comes from an external domain it will
58   - be prepended to this URL.
59   -
60   - """
61   - doc_type = doc.get('Type')
62   -
63   - url = ''
64   - if settings.SOLR_COLAB_URI:
65   - url += settings.SOLR_COLAB_URI
66   -
67   - url += doc.get('path_string', '')
68   - doc.update({'url': url})
69   -
70   - return doc
71   -
72   -
73   -def get_document_from_addr(doc):
74   - """Get a EmailAddress instance for the given document if
75   - its available.
76   -
77   - """
78   -
79   - username = doc.get('last_author')
80   - if not username:
81   - username = doc.get('Creator')
82   - from_addresses = EmailAddress.objects.filter(user__username=username)
83   - if username and from_addresses:
84   - doc.update({'from_address': from_addresses[0]})
85   -
86   -
87   -def add_attrs_to_doc(doc):
88   - """Wraps the call of functions that adds or modifies keys
89   - of the giving doc (which should be a dict).
90   -
91   - """
92   - get_document_url(doc)
93   - parse_document_timestamps(doc)
94   - get_document_from_addr(doc)
95   -
96   -
97   -class SolrPaginator(list):
98   -
99   - def __init__(self, response_dict, current_page):
100   - super(SolrPaginator, self).__init__()
101   -
102   - responseHeader = response_dict.get('responseHeader', {})
103   - response = response_dict.get('response', {})
104   - request_params = responseHeader.get('params', {})
105   -
106   - docs = response.get('docs', [])
107   - self.extend(docs)
108   -
109   - self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
110   -
111   - self.per_page = int(request_params.get('rows', 10))
112   - self.numFound = int(response.get('numFound', 0))
113   - self.page_num = current_page
114   -
115   - self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
116   -
117   - self.has_previous = self.page_num > 1
118   - if self.has_previous:
119   - self.previous_page_number = self.page_num - 1
120   - else:
121   - self.previous_page_number = None
122   -
123   - self.has_next = self.page_num < self.num_of_pages
124   - if self.has_next:
125   - self.next_page_number = self.page_num + 1
126   - else:
127   - self.next_page_number = None
128   -
129   - @property
130   - def last_page(self):
131   - return self.num_of_pages
132   -
133   -
134   -def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
135   - """Perform a select in a Solr instance using the configuration
136   - set in settings.py.
137   -
138   - """
139   -
140   - if not settings.SOLR_HOSTNAME:
141   - return {}
142   -
143   - data = {
144   - 'q': query,
145   - 'wt': 'json',
146   - }
147   -
148   - # Number of results per page
149   - if results_per_page:
150   - data.update({'rows': results_per_page})
151   -
152   - # Page number
153   - if page_number:
154   - data.update({
155   - 'start': (page_number - 1) * results_per_page,
156   - })
157   -
158   - # Sort order
159   - if sort:
160   - data.update({
161   - 'sort': sort,
162   - })
163   -
164   - # Only select those fields
165   - if fields:
166   - data.update({
167   - 'fl': ','.join(fields),
168   - })
169   - # First version of this was implemented using urllib2 and was
170   - # a milion times easier but unfortunatelly urllib2.urlopen
171   - # does not support http headers. Without setting http headers
172   - # for charset the solr server tries to decode utf-8 params
173   - # as ASCII causing it to crash. HTTPConnection deals with
174   - # encodings automagically.
175   - solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,
176   - settings.SOLR_PORT)
177   - query_params = urllib.urlencode(data)
178   - solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
179   -
180   - try:
181   - solr_conn.request('GET', solr_select_uri)
182   - solr_response = solr_conn.getresponse()
183   - except socket.error as err:
184   - solr_response = None
185   - logging.exception(err)
186   -
187   - if solr_response and solr_response.status == 200:
188   - #TODO: Log error connecting to solr
189   - solr_json_resp = solr_response.read()
190   - solr_dict_resp = json.loads(solr_json_resp)
191   - else:
192   - solr_dict_resp = {}
193   -
194   - docs = solr_dict_resp.get('response', {}).get("docs", [])
195   -
196   - if link_attrs:
197   - # Loop over all documents adding or linking its information
198   - # with the data from this app or database
199   - map(add_attrs_to_doc, docs)
200   -
201   - return solr_dict_resp
202   -
203   -
204   -def get_latest_collaborations(number=10, username=None):
205   - """Get the n documents recently modified that this username
206   - has helped in somehow.
207   -
208   - """
209   -
210   - if username:
211   - filters = {'collaborator': username}
212   - else:
213   - filters = None
214   -
215   - query = build_query('*', filters)
216   - solr_response = select(
217   - query=query,
218   - results_per_page=number,
219   - sort='modified desc'
220   - )
221   -
222   - return solr_response.get('response', {}).get('docs', [])
223   -
224   -
225   -def count_types(sample=100, filters=None):
226   - """Count the type of the last modifications returning the
227   - results in dict.
228   -
229   - Example: {
230   - 'wiki' 30,
231   - 'thread': 40,
232   - 'ticket', 10,
233   - 'changeset' 20,
234   - }
235   -
236   - """
237   -
238   - query = build_query('*', filters)
239   - solr_response = select(
240   - query=query,
241   - results_per_page=sample,
242   - sort='modified desc',
243   - link_attrs=False,
244   - )
245   -
246   - docs = solr_response.get('response', {}).get('docs', [])
247   -
248   - type_count = {}
249   - for doc in docs:
250   - doc_type = doc.get('Type')
251   - count = type_count.get(doc_type, 0) + 1
252   - type_count.update({doc_type: count})
253   -
254   - return type_count