removing solrutils file

Luan
1 parent f6d5f22e
Showing 1 changed file with 0 additions and 254 deletions Show diff stats
src/colab/deprecated/solrutils.py
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-import math
-import json
-import urllib
-import socket
-import logging
-import httplib
-
-from dateutil.parser import parse as parse_timestamp
-
-from django.conf import settings
-
-from super_archives.models import EmailAddress
-
-
-def build_query(user_query, filters=None):
-    """Build the query that will be sent to Solr"""    
-
-    if not user_query:
-        user_query = '*'
-
-    query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
-    if filters:
-        query = "(%s)" % query
-
-        for (key, value) in filters.items():
-            if value:
-                query += " AND %s:%s" % (key, value)
-    
-    logging.info(query)
-    return query.encode('utf-8')
-
-
-def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
-    """Converts the `modified' and `created' dates from
-    ISO 8601 format to a date time object for the given 
-    document.
-    
-    """
-    
-    for date in date_attrs:
-        date_str = doc.get(date)
-        try:
-            date_obj = parse_timestamp(date_str)
-        except ValueError:
-            logging.error('Error trying to parse "%s"', date_str)
-            date_obj = None
-        doc.update({date: date_obj})
-    
-    return doc
-
-
-def get_document_url(doc):
-    """Set the url attribute for a document using the path_string.
-    In case the resource comes from an external domain it will
-    be prepended to this URL.
-
-    """
-    doc_type = doc.get('Type')
-    
-    url = ''
-    if settings.SOLR_COLAB_URI:
-        url += settings.SOLR_COLAB_URI
-    
-    url += doc.get('path_string', '') 
-    doc.update({'url': url})
-    
-    return doc
-    
-
-def get_document_from_addr(doc):
-    """Get a EmailAddress instance for the given document if 
-    its available.
-    
-    """
-    
-    username = doc.get('last_author')
-    if not username:
-        username = doc.get('Creator')
-    from_addresses = EmailAddress.objects.filter(user__username=username)
-    if username and from_addresses:
-        doc.update({'from_address': from_addresses[0]})
-    
-
-def add_attrs_to_doc(doc):
-    """Wraps the call of functions that adds or modifies keys
-    of the giving doc (which should be a dict).
-    
-    """
-    get_document_url(doc)
-    parse_document_timestamps(doc)
-    get_document_from_addr(doc)
-
-
-class SolrPaginator(list):
-    
-    def __init__(self, response_dict, current_page):
-        super(SolrPaginator, self).__init__()
-        
-        responseHeader = response_dict.get('responseHeader', {}) 
-        response = response_dict.get('response', {})
-        request_params = responseHeader.get('params', {})
-        
-        docs = response.get('docs', [])
-        self.extend(docs)
-
-        self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
-       
-        self.per_page = int(request_params.get('rows', 10))
-        self.numFound = int(response.get('numFound', 0))
-        self.page_num = current_page
-    
-        self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
-        
-        self.has_previous = self.page_num > 1
-        if self.has_previous: 
-            self.previous_page_number = self.page_num - 1
-        else:
-            self.previous_page_number = None
-        
-        self.has_next = self.page_num < self.num_of_pages
-        if self.has_next:
-            self.next_page_number = self.page_num + 1        
-        else:
-            self.next_page_number = None
-    
-    @property
-    def last_page(self):
-        return self.num_of_pages
-
-
-def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
-    """Perform a select in a Solr instance using the configuration
-    set in settings.py.
-    
-    """
-
-    if not settings.SOLR_HOSTNAME:
-        return {}
-    
-    data = {
-        'q': query, 
-        'wt': 'json',
-    }
-    
-    # Number of results per page
-    if results_per_page:
-        data.update({'rows': results_per_page})
-        
-        # Page number
-        if page_number:
-            data.update({
-                'start': (page_number - 1) * results_per_page,
-            })
-            
-    # Sort order
-    if sort:
-        data.update({
-            'sort': sort,
-        })
-    
-    # Only select those fields
-    if fields:
-        data.update({
-            'fl': ','.join(fields),
-        })
-    # First version of this was implemented using urllib2 and was
-    #   a milion times easier but unfortunatelly urllib2.urlopen
-    #   does not support http headers. Without setting http headers
-    #   for charset the solr server tries to decode utf-8 params
-    #   as ASCII causing it to crash. HTTPConnection deals with
-    #   encodings automagically.
-    solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,          
-                                       settings.SOLR_PORT)
-    query_params = urllib.urlencode(data)
-    solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
-
-    try:
-        solr_conn.request('GET', solr_select_uri)
-        solr_response = solr_conn.getresponse()
-    except socket.error as err: 
-        solr_response = None
-        logging.exception(err)
-
-    if solr_response and solr_response.status == 200:
-        #TODO: Log error connecting to solr
-        solr_json_resp = solr_response.read()
-        solr_dict_resp = json.loads(solr_json_resp)
-    else:
-        solr_dict_resp = {}
-    
-    docs = solr_dict_resp.get('response', {}).get("docs", [])
-
-    if link_attrs:
-        # Loop over all documents adding or linking its information
-        #   with the data from this app or database
-        map(add_attrs_to_doc, docs)
-    
-    return solr_dict_resp
-    
-
-def get_latest_collaborations(number=10, username=None):
-    """Get the n documents recently modified that this username
-    has helped in somehow.
-    
-    """
-    
-    if username:
-        filters = {'collaborator': username}
-    else:
-        filters = None
-    
-    query = build_query('*', filters)
-    solr_response = select(
-        query=query, 
-        results_per_page=number, 
-        sort='modified desc'
-    )
-    
-    return solr_response.get('response', {}).get('docs', [])
-
-
-def count_types(sample=100, filters=None):
-    """Count the type of the last modifications returning the
-    results in dict.
-    
-    Example: {
-        'wiki' 30,
-        'thread': 40,
-        'ticket', 10,
-        'changeset' 20,
-    }
-    
-    """
-    
-    query = build_query('*', filters)
-    solr_response = select(
-        query=query,
-        results_per_page=sample,
-        sort='modified desc',
-        link_attrs=False,
-    )
-
-    docs = solr_response.get('response', {}).get('docs', [])
-
-    type_count = {}
-    for doc in docs:
-        doc_type = doc.get('Type')
-        count = type_count.get(doc_type, 0) + 1
-        type_count.update({doc_type: count})
-
-    return type_count
	@@ -1,254 +0,0 @@	@@ -1,254 +0,0 @@
1	-#!/usr/bin/env python
2	-# encoding: utf-8
3	-
4	-import math
5	-import json
6	-import urllib
7	-import socket
8	-import logging
9	-import httplib
10	-
11	-from dateutil.parser import parse as parse_timestamp
12	-
13	-from django.conf import settings
14	-
15	-from super_archives.models import EmailAddress
16	-
17	-
18	-def build_query(user_query, filters=None):
19	- """Build the query that will be sent to Solr"""
20	-
21	- if not user_query:
22	- user_query = '*'
23	-
24	- query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
25	- if filters:
26	- query = "(%s)" % query
27	-
28	- for (key, value) in filters.items():
29	- if value:
30	- query += " AND %s:%s" % (key, value)
31	-
32	- logging.info(query)
33	- return query.encode('utf-8')
34	-
35	-
36	-def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
37	- """Converts the `modified' and `created' dates from
38	- ISO 8601 format to a date time object for the given
39	- document.
40	-
41	- """
42	-
43	- for date in date_attrs:
44	- date_str = doc.get(date)
45	- try:
46	- date_obj = parse_timestamp(date_str)
47	- except ValueError:
48	- logging.error('Error trying to parse "%s"', date_str)
49	- date_obj = None
50	- doc.update({date: date_obj})
51	-
52	- return doc
53	-
54	-
55	-def get_document_url(doc):
56	- """Set the url attribute for a document using the path_string.
57	- In case the resource comes from an external domain it will
58	- be prepended to this URL.
59	-
60	- """
61	- doc_type = doc.get('Type')
62	-
63	- url = ''
64	- if settings.SOLR_COLAB_URI:
65	- url += settings.SOLR_COLAB_URI
66	-
67	- url += doc.get('path_string', '')
68	- doc.update({'url': url})
69	-
70	- return doc
71	-
72	-
73	-def get_document_from_addr(doc):
74	- """Get a EmailAddress instance for the given document if
75	- its available.
76	-
77	- """
78	-
79	- username = doc.get('last_author')
80	- if not username:
81	- username = doc.get('Creator')
82	- from_addresses = EmailAddress.objects.filter(user__username=username)
83	- if username and from_addresses:
84	- doc.update({'from_address': from_addresses[0]})
85	-
86	-
87	-def add_attrs_to_doc(doc):
88	- """Wraps the call of functions that adds or modifies keys
89	- of the giving doc (which should be a dict).
90	-
91	- """
92	- get_document_url(doc)
93	- parse_document_timestamps(doc)
94	- get_document_from_addr(doc)
95	-
96	-
97	-class SolrPaginator(list):
98	-
99	- def __init__(self, response_dict, current_page):
100	- super(SolrPaginator, self).__init__()
101	-
102	- responseHeader = response_dict.get('responseHeader', {})
103	- response = response_dict.get('response', {})
104	- request_params = responseHeader.get('params', {})
105	-
106	- docs = response.get('docs', [])
107	- self.extend(docs)
108	-
109	- self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
110	-
111	- self.per_page = int(request_params.get('rows', 10))
112	- self.numFound = int(response.get('numFound', 0))
113	- self.page_num = current_page
114	-
115	- self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
116	-
117	- self.has_previous = self.page_num > 1
118	- if self.has_previous:
119	- self.previous_page_number = self.page_num - 1
120	- else:
121	- self.previous_page_number = None
122	-
123	- self.has_next = self.page_num < self.num_of_pages
124	- if self.has_next:
125	- self.next_page_number = self.page_num + 1
126	- else:
127	- self.next_page_number = None
128	-
129	- @property
130	- def last_page(self):
131	- return self.num_of_pages
132	-
133	-
134	-def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
135	- """Perform a select in a Solr instance using the configuration
136	- set in settings.py.
137	-
138	- """
139	-
140	- if not settings.SOLR_HOSTNAME:
141	- return {}
142	-
143	- data = {
144	- 'q': query,
145	- 'wt': 'json',
146	- }
147	-
148	- # Number of results per page
149	- if results_per_page:
150	- data.update({'rows': results_per_page})
151	-
152	- # Page number
153	- if page_number:
154	- data.update({
155	- 'start': (page_number - 1) * results_per_page,
156	- })
157	-
158	- # Sort order
159	- if sort:
160	- data.update({
161	- 'sort': sort,
162	- })
163	-
164	- # Only select those fields
165	- if fields:
166	- data.update({
167	- 'fl': ','.join(fields),
168	- })
169	- # First version of this was implemented using urllib2 and was
170	- # a milion times easier but unfortunatelly urllib2.urlopen
171	- # does not support http headers. Without setting http headers
172	- # for charset the solr server tries to decode utf-8 params
173	- # as ASCII causing it to crash. HTTPConnection deals with
174	- # encodings automagically.
175	- solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,
176	- settings.SOLR_PORT)
177	- query_params = urllib.urlencode(data)
178	- solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
179	-
180	- try:
181	- solr_conn.request('GET', solr_select_uri)
182	- solr_response = solr_conn.getresponse()
183	- except socket.error as err:
184	- solr_response = None
185	- logging.exception(err)
186	-
187	- if solr_response and solr_response.status == 200:
188	- #TODO: Log error connecting to solr
189	- solr_json_resp = solr_response.read()
190	- solr_dict_resp = json.loads(solr_json_resp)
191	- else:
192	- solr_dict_resp = {}
193	-
194	- docs = solr_dict_resp.get('response', {}).get("docs", [])
195	-
196	- if link_attrs:
197	- # Loop over all documents adding or linking its information
198	- # with the data from this app or database
199	- map(add_attrs_to_doc, docs)
200	-
201	- return solr_dict_resp
202	-
203	-
204	-def get_latest_collaborations(number=10, username=None):
205	- """Get the n documents recently modified that this username
206	- has helped in somehow.
207	-
208	- """
209	-
210	- if username:
211	- filters = {'collaborator': username}
212	- else:
213	- filters = None
214	-
215	- query = build_query('*', filters)
216	- solr_response = select(
217	- query=query,
218	- results_per_page=number,
219	- sort='modified desc'
220	- )
221	-
222	- return solr_response.get('response', {}).get('docs', [])
223	-
224	-
225	-def count_types(sample=100, filters=None):
226	- """Count the type of the last modifications returning the
227	- results in dict.
228	-
229	- Example: {
230	- 'wiki' 30,
231	- 'thread': 40,
232	- 'ticket', 10,
233	- 'changeset' 20,
234	- }
235	-
236	- """
237	-
238	- query = build_query('*', filters)
239	- solr_response = select(
240	- query=query,
241	- results_per_page=sample,
242	- sort='modified desc',
243	- link_attrs=False,
244	- )
245	-
246	- docs = solr_response.get('response', {}).get('docs', [])
247	-
248	- type_count = {}
249	- for doc in docs:
250	- doc_type = doc.get('Type')
251	- count = type_count.get(doc_type, 0) + 1
252	- type_count.update({doc_type: count})
253	-
254	- return type_count