Commit 994c12253b2c51b8caff226c9a2821402bf094e7

Authored by Tássia Camões Araújo
1 parent bb8d206b
Exists in master and in 1 other branch add_vagrant

- Apt-xapian-index is now considered as an item repository.

- Created AxiContentBasedStrategy() to perform recommendation based on axi.
- Created different methods for extracting user profile from DebtagsDB and axi.
- DebtagsIndex class now inherits from xapian.WritableDatabase, so that it can
  be substituted by axi database when convenient.
(close #1)
src/app_recommender.py
@@ -26,33 +26,50 @@ from recommender import * @@ -26,33 +26,50 @@ from recommender import *
26 from strategy import * 26 from strategy import *
27 from user import * 27 from user import *
28 28
  29 +# Setup configuration
29 DB_PATH = "/var/lib/debtags/package-tags" 30 DB_PATH = "/var/lib/debtags/package-tags"
30 INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") 31 INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
31 32
  33 +XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
  34 +XAPIANDB = XAPIANDBPATH + "/index"
  35 +XAPIANDBVALUES = XAPIANDBPATH + "/values"
  36 +
32 if __name__ == '__main__': 37 if __name__ == '__main__':
33 38
34 reindex = 0 39 reindex = 0
  40 + axi = 0
35 if len(sys.argv) == 2: 41 if len(sys.argv) == 2:
36 - DB_PATH = sys.argv[1]  
37 - reindex = 1  
38 - print "reindex true" 42 + if sys.argv[1] == "axi":
  43 + axi = 1
  44 + else:
  45 + DB_PATH = sys.argv[1]
  46 + reindex = 1
39 elif len(sys.argv) > 2: 47 elif len(sys.argv) > 2:
40 print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % 48 print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
41 sys.argv[0]) 49 sys.argv[0])
42 sys.exit(1) 50 sys.exit(1)
43 51
44 - debtags_db = DebtagsDB(DB_PATH)  
45 - if not debtags_db.load(): sys.exit(1) 52 + if axi:
  53 + axi_db = xapian.Database(XAPIANDB)
  54 + app_rec = Recommender(axi_db)
  55 + app_rec.set_strategy(AxiContentBasedStrategy())
  56 + else:
  57 + debtags_db = DebtagsDB(DB_PATH)
  58 + if not debtags_db.load():
  59 + print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH)
  60 + sys.exit(1)
  61 + debtags_index = DebtagsIndex(
  62 + os.path.expanduser("~/.app-recommender/debtags_index"))
  63 + debtags_index.load(debtags_db,reindex)
  64 + app_rec = Recommender(debtags_index)
  65 + app_rec.set_strategy(ContentBasedStrategy())
46 66
47 user = LocalSystem() 67 user = LocalSystem()
48 - recommender = Recommender(items_repository=debtags_db,  
49 - strategy=ContentBasedStrategy(reindex))  
50 -  
51 - result = recommender.generate_recommendation(user) 68 + result = app_rec.get_recommendation(user)
52 result.print_result() 69 result.print_result()
53 70
54 metrics = [] 71 metrics = []
55 metrics.append(Precision()) 72 metrics.append(Precision())
56 metrics.append(Recall()) 73 metrics.append(Recall())
57 - validation = CrossValidation(0.1,10,recommender,metrics) 74 + validation = CrossValidation(0.1,10,app_rec,metrics)
58 validation.run(user) 75 validation.run(user)
@@ -41,9 +41,7 @@ def normalize_tags(string): @@ -41,9 +41,7 @@ def normalize_tags(string):
41 """ Normalize tag string so that it can be indexed and retrieved. """ 41 """ Normalize tag string so that it can be indexed and retrieved. """
42 return string.replace(':','_').replace('-','\'') 42 return string.replace(':','_').replace('-','\'')
43 43
44 -class DataRepository:  
45 - """ """  
46 - # FIXME todos os repositorios devem ser singleton 44 +# FIXME Data repositories should be singleton
47 45
48 class DebtagsDB(debtags.DB): 46 class DebtagsDB(debtags.DB):
49 def __init__(self,path): 47 def __init__(self,path):
@@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB): @@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB):
68 relevance_index(b))) 66 relevance_index(b)))
69 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) 67 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
70 68
71 -class DebtagsIndex: 69 +class DebtagsIndex(xapian.WritableDatabase):
72 def __init__(self,path): 70 def __init__(self,path):
73 self.path = path 71 self.path = path
74 72
75 def load(self,debtags_db,reindex): 73 def load(self,debtags_db,reindex):
76 """ Load an existing debtags index. """ 74 """ Load an existing debtags index. """
  75 + self.debtags_db = debtags_db
77 if not reindex: 76 if not reindex:
78 try: 77 try:
79 - #print ("Opening existing debtags xapian index at \'%s\'" %  
80 - # self.path)  
81 - self.index = xapian.Database(self.path)  
82 - except DatabaseError: 78 + print ("Opening existing debtags xapian index at \'%s\'" %
  79 + self.path)
  80 + xapian.Database.__init__(self,self.path)
  81 + except xapian.DatabaseError:
83 print "Could not open debtags xapian index" 82 print "Could not open debtags xapian index"
84 reindex =1 83 reindex =1
85 if reindex: 84 if reindex:
@@ -92,11 +91,11 @@ class DebtagsIndex: @@ -92,11 +91,11 @@ class DebtagsIndex:
92 if not os.path.exists(self.path): 91 if not os.path.exists(self.path):
93 os.makedirs(self.path) 92 os.makedirs(self.path)
94 print "Creating new debtags xapian index at \'%s\'" % self.path 93 print "Creating new debtags xapian index at \'%s\'" % self.path
95 - self.index = xapian.WritableDatabase(self.path, 94 + xapian.WritableDatabase.__init__(self,self.path,
96 xapian.DB_CREATE_OR_OVERWRITE) 95 xapian.DB_CREATE_OR_OVERWRITE)
97 for pkg,tags in debtags_db.iter_packages_tags(): 96 for pkg,tags in debtags_db.iter_packages_tags():
98 doc = xapian.Document() 97 doc = xapian.Document()
99 doc.set_data(pkg) 98 doc.set_data(pkg)
100 for tag in tags: 99 for tag in tags:
101 doc.add_term(normalize_tags(tag)) 100 doc.add_term(normalize_tags(tag))
102 - print "indexing ",self.index.add_document(doc) 101 + print "indexing ",self.add_document(doc)
src/evaluation.py
@@ -139,7 +139,7 @@ class CrossValidation: @@ -139,7 +139,7 @@ class CrossValidation:
139 exit(1) 139 exit(1)
140 round_partition[random_key] = cross_item_score.pop(random_key) 140 round_partition[random_key] = cross_item_score.pop(random_key)
141 round_user = User(cross_item_score) 141 round_user = User(cross_item_score)
142 - predicted_result = self.recommender.generate_recommendation(round_user) 142 + predicted_result = self.recommender.get_recommendation(round_user)
143 real_result = RecommendationResult(round_partition,len(round_partition)) 143 real_result = RecommendationResult(round_partition,len(round_partition))
144 evaluation = Evaluation(predicted_result,real_result) 144 evaluation = Evaluation(predicted_result,real_result)
145 for metric in self.metrics_list: 145 for metric in self.metrics_list:
src/recommender.py
@@ -35,17 +35,16 @@ class RecommendationResult: @@ -35,17 +35,16 @@ class RecommendationResult:
35 35
36 class Recommender: 36 class Recommender:
37 """ """ 37 """ """
38 - def __init__(self,items_repository=None,users_repository=None,  
39 - knowledge_repository=None,strategy=None): 38 + def __init__(self,items_repository,users_repository=None,
  39 + knowledge_repository=None):
40 self.items_repository = items_repository 40 self.items_repository = items_repository
41 self.users_repository = users_repository 41 self.users_repository = users_repository
42 self.knowledge_repository = knowledge_repository 42 self.knowledge_repository = knowledge_repository
43 - self.strategy = strategy  
44 43
45 def set_strategy(self,strategy): 44 def set_strategy(self,strategy):
46 """ """ 45 """ """
47 self.strategy = strategy 46 self.strategy = strategy
48 47
49 - def generate_recommendation(self,user): 48 + def get_recommendation(self,user):
50 """ """ 49 """ """
51 return self.strategy.run(self,user) 50 return self.strategy.run(self,user)
src/strategy.py
@@ -17,6 +17,7 @@ @@ -17,6 +17,7 @@
17 # You should have received a copy of the GNU General Public License 17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
  20 +import os, re
20 import xapian 21 import xapian
21 from data import * 22 from data import *
22 from recommender import * 23 from recommender import *
@@ -51,27 +52,32 @@ class RecommendationStrategy: @@ -51,27 +52,32 @@ class RecommendationStrategy:
51 class ItemReputationStrategy(RecommendationStrategy): 52 class ItemReputationStrategy(RecommendationStrategy):
52 """ Recommendation strategy based on items reputation. """ 53 """ Recommendation strategy based on items reputation. """
53 def run(self,items_list,heuristic): 54 def run(self,items_list,heuristic):
54 - """ """ 55 + """ Perform recommendation strategy """
55 return RecomendationResult() 56 return RecomendationResult()
56 57
57 class ContentBasedStrategy(RecommendationStrategy): 58 class ContentBasedStrategy(RecommendationStrategy):
58 """ Content-based recommendation strategy. """ 59 """ Content-based recommendation strategy. """
59 - #def __init__(self,items_repository):  
60 - # self.items_repository = items_repository  
61 - def __init__(self,reindex):  
62 - self.reindex = reindex  
63 -  
64 def run(self,recommender,user): 60 def run(self,recommender,user):
65 - """ """  
66 - best_tags = recommender.items_repository.get_relevant_tags(user.items(),  
67 - 50)  
68 - debtags_index = DebtagsIndex(  
69 - os.path.expanduser("~/.app-recommender/debtags_index"))  
70 - debtags_index.load(recommender.items_repository,self.reindex)  
71 - 61 + """ Perform recommendation strategy """
  62 + profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50)
72 qp = xapian.QueryParser() 63 qp = xapian.QueryParser()
73 - query = qp.parse_query(best_tags)  
74 - enquire = xapian.Enquire(debtags_index.index) 64 + query = qp.parse_query(profile)
  65 + enquire = xapian.Enquire(recommender.items_repository)
  66 + enquire.set_query(query)
  67 +
  68 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  69 + item_score = {}
  70 + for m in mset:
  71 + item_score[m.document.get_data()] = m.rank
  72 + return RecommendationResult(item_score,20)
  73 +
  74 +class AxiContentBasedStrategy(RecommendationStrategy):
  75 + """ Content-based recommendation strategy based on Apt-xapian-index. """
  76 + def run(self,recommender,user):
  77 + """ Perform recommendation strategy """
  78 + profile = user.axi_tag_profile(recommender.items_repository,50)
  79 + query = xapian.Query(xapian.Query.OP_OR,profile)
  80 + enquire = xapian.Enquire(recommender.items_repository)
75 enquire.set_query(query) 81 enquire.set_query(query)
76 82
77 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) 83 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
@@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy): @@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy):
83 class ColaborativeStrategy(RecommendationStrategy): 89 class ColaborativeStrategy(RecommendationStrategy):
84 """ Colaborative recommendation strategy. """ 90 """ Colaborative recommendation strategy. """
85 def run(self,user,users_repository,similarity_measure): 91 def run(self,user,users_repository,similarity_measure):
86 - """ """ 92 + """ Perform recommendation strategy """
87 return RecomendationResult() 93 return RecomendationResult()
88 94
89 class KnowledgeBasedStrategy(RecommendationStrategy): 95 class KnowledgeBasedStrategy(RecommendationStrategy):
90 """ Knowledge-based recommendation strategy. """ 96 """ Knowledge-based recommendation strategy. """
91 def run(self,user,knowledge_repository): 97 def run(self,user,knowledge_repository):
92 - """ """ 98 + """ Perform recommendation strategy """
93 return RecomendationResult() 99 return RecomendationResult()
94 100
95 class DemographicStrategy(RecommendationStrategy): 101 class DemographicStrategy(RecommendationStrategy):
96 """ Recommendation strategy based on demographic data. """ 102 """ Recommendation strategy based on demographic data. """
97 def run(self,user,items_repository): 103 def run(self,user,items_repository):
98 - """ """ 104 + """ Perform recommendation strategy """
99 return RecomendationResult() 105 return RecomendationResult()
@@ -18,6 +18,14 @@ @@ -18,6 +18,14 @@
18 # along with this program. If not, see <http://www.gnu.org/licenses/>. 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19
20 import commands 20 import commands
  21 +import xapian
  22 +
  23 +class FilterTag(xapian.ExpandDecider):
  24 + def __call__(self, term):
  25 + """
  26 + Return true if the term is a tag, else false
  27 + """
  28 + return term[:2] == "XT"
21 29
22 class User: 30 class User:
23 """ """ 31 """ """
@@ -30,6 +38,26 @@ class User: @@ -30,6 +38,26 @@ class User:
30 def items(self): 38 def items(self):
31 return self.item_score.keys() 39 return self.item_score.keys()
32 40
  41 + def axi_tag_profile(self,xapian_db,profile_size):
  42 + terms = []
  43 + for item in self.items():
  44 + terms.append("XP"+item)
  45 + query = xapian.Query(xapian.Query.OP_OR, terms)
  46 + enquire = xapian.Enquire(xapian_db)
  47 + enquire.set_query(query)
  48 + rset = xapian.RSet()
  49 + for m in enquire.get_mset(0,30000): #consider all matches
  50 + rset.add_document(m.docid)
  51 + eset = enquire.get_eset(profile_size, rset, FilterTag())
  52 + profile = []
  53 + for res in eset:
  54 + profile.append(res.term)
  55 + #print "%.2f %s" % (res.weight,res.term[2:])
  56 + return profile
  57 +
  58 + def debtags_tag_profile(self,debtags_db,profile_size):
  59 + return debtags_db.get_relevant_tags(self.items(),profile_size)
  60 +
33 class LocalSystem(User): 61 class LocalSystem(User):
34 """ """ 62 """ """
35 def __init__(self): 63 def __init__(self):