removing solrutils file

Luan
1 parent f6d5f22e
Showing 1 changed file with 0 additions and 254 deletions Show diff stats
src/colab/deprecated/solrutils.py
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-import math
-import json
-import urllib
-import socket
-import logging
-import httplib
-
-from dateutil.parser import parse as parse_timestamp
-
-from django.conf import settings
-
-from super_archives.models import EmailAddress
-
-
-def build_query(user_query, filters=None):
-    """Build the query that will be sent to Solr"""    
-
-    if not user_query:
-        user_query = '*'
-
-    query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
-    if filters:
-        query = "(%s)" % query
-
-        for (key, value) in filters.items():
-            if value:
-                query += " AND %s:%s" % (key, value)
-    
-    logging.info(query)
-    return query.encode('utf-8')
-
-
-def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
-    """Converts the `modified' and `created' dates from
-    ISO 8601 format to a date time object for the given 
-    document.
-    
-    """
-    
-    for date in date_attrs:
-        date_str = doc.get(date)
-        try:
-            date_obj = parse_timestamp(date_str)
-        except ValueError:
-            logging.error('Error trying to parse "%s"', date_str)
-            date_obj = None
-        doc.update({date: date_obj})
-    
-    return doc
-
-
-def get_document_url(doc):
-    """Set the url attribute for a document using the path_string.
-    In case the resource comes from an external domain it will
-    be prepended to this URL.
-
-    """
-    doc_type = doc.get('Type')
-    
-    url = ''
-    if settings.SOLR_COLAB_URI:
-        url += settings.SOLR_COLAB_URI
-    
-    url += doc.get('path_string', '') 
-    doc.update({'url': url})
-    
-    return doc
-    
-
-def get_document_from_addr(doc):
-    """Get a EmailAddress instance for the given document if 
-    its available.
-    
-    """
-    
-    username = doc.get('last_author')
-    if not username:
-        username = doc.get('Creator')
-    from_addresses = EmailAddress.objects.filter(user__username=username)
-    if username and from_addresses:
-        doc.update({'from_address': from_addresses[0]})
-    
-
-def add_attrs_to_doc(doc):
-    """Wraps the call of functions that adds or modifies keys
-    of the giving doc (which should be a dict).
-    
-    """
-    get_document_url(doc)
-    parse_document_timestamps(doc)
-    get_document_from_addr(doc)
-
-
-class SolrPaginator(list):
-    
-    def __init__(self, response_dict, current_page):
-        super(SolrPaginator, self).__init__()
-        
-        responseHeader = response_dict.get('responseHeader', {}) 
-        response = response_dict.get('response', {})
-        request_params = responseHeader.get('params', {})
-        
-        docs = response.get('docs', [])
-        self.extend(docs)
-
-        self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
-       
-        self.per_page = int(request_params.get('rows', 10))
-        self.numFound = int(response.get('numFound', 0))
-        self.page_num = current_page
-    
-        self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
-        
-        self.has_previous = self.page_num > 1
-        if self.has_previous: 
-            self.previous_page_number = self.page_num - 1
-        else:
-            self.previous_page_number = None
-        
-        self.has_next = self.page_num < self.num_of_pages
-        if self.has_next:
-            self.next_page_number = self.page_num + 1        
-        else:
-            self.next_page_number = None
-    
-    @property
-    def last_page(self):
-        return self.num_of_pages
-
-
-def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
-    """Perform a select in a Solr instance using the configuration
-    set in settings.py.
-    
-    """
-
-    if not settings.SOLR_HOSTNAME:
-        return {}
-    
-    data = {
-        'q': query, 
-        'wt': 'json',
-    }
-    
-    # Number of results per page
-    if results_per_page:
-        data.update({'rows': results_per_page})
-        
-        # Page number
-        if page_number:
-            data.update({
-                'start': (page_number - 1) * results_per_page,
-            })
-            
-    # Sort order
-    if sort:
-        data.update({
-            'sort': sort,
-        })
-    
-    # Only select those fields
-    if fields:
-        data.update({
-            'fl': ','.join(fields),
-        })
-    # First version of this was implemented using urllib2 and was
-    #   a milion times easier but unfortunatelly urllib2.urlopen
-    #   does not support http headers. Without setting http headers
-    #   for charset the solr server tries to decode utf-8 params
-    #   as ASCII causing it to crash. HTTPConnection deals with
-    #   encodings automagically.
-    solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,          
-                                       settings.SOLR_PORT)
-    query_params = urllib.urlencode(data)
-    solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
-
-    try:
-        solr_conn.request('GET', solr_select_uri)
-        solr_response = solr_conn.getresponse()
-    except socket.error as err: 
-        solr_response = None
-        logging.exception(err)
-
-    if solr_response and solr_response.status == 200:
-        #TODO: Log error connecting to solr
-        solr_json_resp = solr_response.read()
-        solr_dict_resp = json.loads(solr_json_resp)
-    else:
-        solr_dict_resp = {}
-    
-    docs = solr_dict_resp.get('response', {}).get("docs", [])
-
-    if link_attrs:
-        # Loop over all documents adding or linking its information
-        #   with the data from this app or database
-        map(add_attrs_to_doc, docs)
-    
-    return solr_dict_resp
-    
-
-def get_latest_collaborations(number=10, username=None):
-    """Get the n documents recently modified that this username
-    has helped in somehow.
-    
-    """
-    
-    if username:
-        filters = {'collaborator': username}
-    else:
-        filters = None
-    
-    query = build_query('*', filters)
-    solr_response = select(
-        query=query, 
-        results_per_page=number, 
-        sort='modified desc'
-    )
-    
-    return solr_response.get('response', {}).get('docs', [])
-
-
-def count_types(sample=100, filters=None):
-    """Count the type of the last modifications returning the
-    results in dict.
-    
-    Example: {
-        'wiki' 30,
-        'thread': 40,
-        'ticket', 10,
-        'changeset' 20,
-    }
-    
-    """
-    
-    query = build_query('*', filters)
-    solr_response = select(
-        query=query,
-        results_per_page=sample,
-        sort='modified desc',
-        link_attrs=False,
-    )
-
-    docs = solr_response.get('response', {}).get('docs', [])
-
-    type_count = {}
-    for doc in docs:
-        doc_type = doc.get('Type')
-        count = type_count.get(doc_type, 0) + 1
-        type_count.update({doc_type: count})
-
-    return type_count
...	...	@@ -1,254 +0,0 @@
1		-#!/usr/bin/env python
2		-# encoding: utf-8
3		-
4		-import math
5		-import json
6		-import urllib
7		-import socket
8		-import logging
9		-import httplib
10		-
11		-from dateutil.parser import parse as parse_timestamp
12		-
13		-from django.conf import settings
14		-
15		-from super_archives.models import EmailAddress
16		-
17		-
18		-def build_query(user_query, filters=None):
19		- """Build the query that will be sent to Solr"""
20		-
21		- if not user_query:
22		- user_query = '*'
23		-
24		- query = settings.SOLR_BASE_QUERY.strip() + ' AND ' + user_query
25		- if filters:
26		- query = "(%s)" % query
27		-
28		- for (key, value) in filters.items():
29		- if value:
30		- query += " AND %s:%s" % (key, value)
31		-
32		- logging.info(query)
33		- return query.encode('utf-8')
34		-
35		-
36		-def parse_document_timestamps(doc, date_attrs=('modified', 'created')):
37		- """Converts the `modified' and `created' dates from
38		- ISO 8601 format to a date time object for the given
39		- document.
40		-
41		- """
42		-
43		- for date in date_attrs:
44		- date_str = doc.get(date)
45		- try:
46		- date_obj = parse_timestamp(date_str)
47		- except ValueError:
48		- logging.error('Error trying to parse "%s"', date_str)
49		- date_obj = None
50		- doc.update({date: date_obj})
51		-
52		- return doc
53		-
54		-
55		-def get_document_url(doc):
56		- """Set the url attribute for a document using the path_string.
57		- In case the resource comes from an external domain it will
58		- be prepended to this URL.
59		-
60		- """
61		- doc_type = doc.get('Type')
62		-
63		- url = ''
64		- if settings.SOLR_COLAB_URI:
65		- url += settings.SOLR_COLAB_URI
66		-
67		- url += doc.get('path_string', '')
68		- doc.update({'url': url})
69		-
70		- return doc
71		-
72		-
73		-def get_document_from_addr(doc):
74		- """Get a EmailAddress instance for the given document if
75		- its available.
76		-
77		- """
78		-
79		- username = doc.get('last_author')
80		- if not username:
81		- username = doc.get('Creator')
82		- from_addresses = EmailAddress.objects.filter(user__username=username)
83		- if username and from_addresses:
84		- doc.update({'from_address': from_addresses[0]})
85		-
86		-
87		-def add_attrs_to_doc(doc):
88		- """Wraps the call of functions that adds or modifies keys
89		- of the giving doc (which should be a dict).
90		-
91		- """
92		- get_document_url(doc)
93		- parse_document_timestamps(doc)
94		- get_document_from_addr(doc)
95		-
96		-
97		-class SolrPaginator(list):
98		-
99		- def __init__(self, response_dict, current_page):
100		- super(SolrPaginator, self).__init__()
101		-
102		- responseHeader = response_dict.get('responseHeader', {})
103		- response = response_dict.get('response', {})
104		- request_params = responseHeader.get('params', {})
105		-
106		- docs = response.get('docs', [])
107		- self.extend(docs)
108		-
109		- self.QTime = int(responseHeader.get('QTime', 1)) / 1000.0
110		-
111		- self.per_page = int(request_params.get('rows', 10))
112		- self.numFound = int(response.get('numFound', 0))
113		- self.page_num = current_page
114		-
115		- self.num_of_pages = int(math.ceil(self.numFound / float(self.per_page)))
116		-
117		- self.has_previous = self.page_num > 1
118		- if self.has_previous:
119		- self.previous_page_number = self.page_num - 1
120		- else:
121		- self.previous_page_number = None
122		-
123		- self.has_next = self.page_num < self.num_of_pages
124		- if self.has_next:
125		- self.next_page_number = self.page_num + 1
126		- else:
127		- self.next_page_number = None
128		-
129		- @property
130		- def last_page(self):
131		- return self.num_of_pages
132		-
133		-
134		-def select(query, results_per_page=None, page_number=None, sort=None, fields=None, link_attrs=True):
135		- """Perform a select in a Solr instance using the configuration
136		- set in settings.py.
137		-
138		- """
139		-
140		- if not settings.SOLR_HOSTNAME:
141		- return {}
142		-
143		- data = {
144		- 'q': query,
145		- 'wt': 'json',
146		- }
147		-
148		- # Number of results per page
149		- if results_per_page:
150		- data.update({'rows': results_per_page})
151		-
152		- # Page number
153		- if page_number:
154		- data.update({
155		- 'start': (page_number - 1) * results_per_page,
156		- })
157		-
158		- # Sort order
159		- if sort:
160		- data.update({
161		- 'sort': sort,
162		- })
163		-
164		- # Only select those fields
165		- if fields:
166		- data.update({
167		- 'fl': ','.join(fields),
168		- })
169		- # First version of this was implemented using urllib2 and was
170		- # a milion times easier but unfortunatelly urllib2.urlopen
171		- # does not support http headers. Without setting http headers
172		- # for charset the solr server tries to decode utf-8 params
173		- # as ASCII causing it to crash. HTTPConnection deals with
174		- # encodings automagically.
175		- solr_conn = httplib.HTTPConnection(settings.SOLR_HOSTNAME,
176		- settings.SOLR_PORT)
177		- query_params = urllib.urlencode(data)
178		- solr_select_uri = settings.SOLR_SELECT_PATH + '?' + query_params
179		-
180		- try:
181		- solr_conn.request('GET', solr_select_uri)
182		- solr_response = solr_conn.getresponse()
183		- except socket.error as err:
184		- solr_response = None
185		- logging.exception(err)
186		-
187		- if solr_response and solr_response.status == 200:
188		- #TODO: Log error connecting to solr
189		- solr_json_resp = solr_response.read()
190		- solr_dict_resp = json.loads(solr_json_resp)
191		- else:
192		- solr_dict_resp = {}
193		-
194		- docs = solr_dict_resp.get('response', {}).get("docs", [])
195		-
196		- if link_attrs:
197		- # Loop over all documents adding or linking its information
198		- # with the data from this app or database
199		- map(add_attrs_to_doc, docs)
200		-
201		- return solr_dict_resp
202		-
203		-
204		-def get_latest_collaborations(number=10, username=None):
205		- """Get the n documents recently modified that this username
206		- has helped in somehow.
207		-
208		- """
209		-
210		- if username:
211		- filters = {'collaborator': username}
212		- else:
213		- filters = None
214		-
215		- query = build_query('*', filters)
216		- solr_response = select(
217		- query=query,
218		- results_per_page=number,
219		- sort='modified desc'
220		- )
221		-
222		- return solr_response.get('response', {}).get('docs', [])
223		-
224		-
225		-def count_types(sample=100, filters=None):
226		- """Count the type of the last modifications returning the
227		- results in dict.
228		-
229		- Example: {
230		- 'wiki' 30,
231		- 'thread': 40,
232		- 'ticket', 10,
233		- 'changeset' 20,
234		- }
235		-
236		- """
237		-
238		- query = build_query('*', filters)
239		- solr_response = select(
240		- query=query,
241		- results_per_page=sample,
242		- sort='modified desc',
243		- link_attrs=False,
244		- )
245		-
246		- docs = solr_response.get('response', {}).get('docs', [])
247		-
248		- type_count = {}
249		- for doc in docs:
250		- doc_type = doc.get('Type')
251		- count = type_count.get(doc_type, 0) + 1
252		- type_count.update({doc_type: count})
253		-
254		- return type_count