From 994c12253b2c51b8caff226c9a2821402bf094e7 Mon Sep 17 00:00:00 2001 From: Tássia Camões Araújo Date: Tue, 8 Mar 2011 23:31:01 -0300 Subject: [PATCH] - Apt-xapian-index is now considered as an item repository. - Created AxiContentBasedStrategy() to perform recommendation based on axi. - Created different methods for extracting user profile from DebtagsDB and axi. - DebtagsIndex class now inherits from xapian.WritableDatabase, so that it can be substituted by axi database when convenient. (close #1) --- src/app_recommender.py | 37 +++++++++++++++++++++++++++---------- src/data.py | 19 +++++++++---------- src/evaluation.py | 2 +- src/recommender.py | 7 +++---- src/strategy.py | 42 ++++++++++++++++++++++++------------------ src/user.py | 28 ++++++++++++++++++++++++++++ 6 files changed, 92 insertions(+), 43 deletions(-) diff --git a/src/app_recommender.py b/src/app_recommender.py index 5414700..7405d5a 100755 --- a/src/app_recommender.py +++ b/src/app_recommender.py @@ -26,33 +26,50 @@ from recommender import * from strategy import * from user import * +# Setup configuration DB_PATH = "/var/lib/debtags/package-tags" INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") +XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index") +XAPIANDB = XAPIANDBPATH + "/index" +XAPIANDBVALUES = XAPIANDBPATH + "/values" + if __name__ == '__main__': reindex = 0 + axi = 0 if len(sys.argv) == 2: - DB_PATH = sys.argv[1] - reindex = 1 - print "reindex true" + if sys.argv[1] == "axi": + axi = 1 + else: + DB_PATH = sys.argv[1] + reindex = 1 elif len(sys.argv) > 2: print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % sys.argv[0]) sys.exit(1) - debtags_db = DebtagsDB(DB_PATH) - if not debtags_db.load(): sys.exit(1) + if axi: + axi_db = xapian.Database(XAPIANDB) + app_rec = Recommender(axi_db) + app_rec.set_strategy(AxiContentBasedStrategy()) + else: + debtags_db = DebtagsDB(DB_PATH) + if not debtags_db.load(): + print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH) + sys.exit(1) + debtags_index = DebtagsIndex( + os.path.expanduser("~/.app-recommender/debtags_index")) + debtags_index.load(debtags_db,reindex) + app_rec = Recommender(debtags_index) + app_rec.set_strategy(ContentBasedStrategy()) user = LocalSystem() - recommender = Recommender(items_repository=debtags_db, - strategy=ContentBasedStrategy(reindex)) - - result = recommender.generate_recommendation(user) + result = app_rec.get_recommendation(user) result.print_result() metrics = [] metrics.append(Precision()) metrics.append(Recall()) - validation = CrossValidation(0.1,10,recommender,metrics) + validation = CrossValidation(0.1,10,app_rec,metrics) validation.run(user) diff --git a/src/data.py b/src/data.py index 851272e..2ece072 100644 --- a/src/data.py +++ b/src/data.py @@ -41,9 +41,7 @@ def normalize_tags(string): """ Normalize tag string so that it can be indexed and retrieved. """ return string.replace(':','_').replace('-','\'') -class DataRepository: - """ """ - # FIXME todos os repositorios devem ser singleton +# FIXME Data repositories should be singleton class DebtagsDB(debtags.DB): def __init__(self,path): @@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB): relevance_index(b))) return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) -class DebtagsIndex: +class DebtagsIndex(xapian.WritableDatabase): def __init__(self,path): self.path = path def load(self,debtags_db,reindex): """ Load an existing debtags index. """ + self.debtags_db = debtags_db if not reindex: try: - #print ("Opening existing debtags xapian index at \'%s\'" % - # self.path) - self.index = xapian.Database(self.path) - except DatabaseError: + print ("Opening existing debtags xapian index at \'%s\'" % + self.path) + xapian.Database.__init__(self,self.path) + except xapian.DatabaseError: print "Could not open debtags xapian index" reindex =1 if reindex: @@ -92,11 +91,11 @@ class DebtagsIndex: if not os.path.exists(self.path): os.makedirs(self.path) print "Creating new debtags xapian index at \'%s\'" % self.path - self.index = xapian.WritableDatabase(self.path, + xapian.WritableDatabase.__init__(self,self.path, xapian.DB_CREATE_OR_OVERWRITE) for pkg,tags in debtags_db.iter_packages_tags(): doc = xapian.Document() doc.set_data(pkg) for tag in tags: doc.add_term(normalize_tags(tag)) - print "indexing ",self.index.add_document(doc) + print "indexing ",self.add_document(doc) diff --git a/src/evaluation.py b/src/evaluation.py index 01dd19e..8a01603 100644 --- a/src/evaluation.py +++ b/src/evaluation.py @@ -139,7 +139,7 @@ class CrossValidation: exit(1) round_partition[random_key] = cross_item_score.pop(random_key) round_user = User(cross_item_score) - predicted_result = self.recommender.generate_recommendation(round_user) + predicted_result = self.recommender.get_recommendation(round_user) real_result = RecommendationResult(round_partition,len(round_partition)) evaluation = Evaluation(predicted_result,real_result) for metric in self.metrics_list: diff --git a/src/recommender.py b/src/recommender.py index 776c626..33301f4 100644 --- a/src/recommender.py +++ b/src/recommender.py @@ -35,17 +35,16 @@ class RecommendationResult: class Recommender: """ """ - def __init__(self,items_repository=None,users_repository=None, - knowledge_repository=None,strategy=None): + def __init__(self,items_repository,users_repository=None, + knowledge_repository=None): self.items_repository = items_repository self.users_repository = users_repository self.knowledge_repository = knowledge_repository - self.strategy = strategy def set_strategy(self,strategy): """ """ self.strategy = strategy - def generate_recommendation(self,user): + def get_recommendation(self,user): """ """ return self.strategy.run(self,user) diff --git a/src/strategy.py b/src/strategy.py index 76259df..73d8344 100644 --- a/src/strategy.py +++ b/src/strategy.py @@ -17,6 +17,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import os, re import xapian from data import * from recommender import * @@ -51,27 +52,32 @@ class RecommendationStrategy: class ItemReputationStrategy(RecommendationStrategy): """ Recommendation strategy based on items reputation. """ def run(self,items_list,heuristic): - """ """ + """ Perform recommendation strategy """ return RecomendationResult() class ContentBasedStrategy(RecommendationStrategy): """ Content-based recommendation strategy. """ - #def __init__(self,items_repository): - # self.items_repository = items_repository - def __init__(self,reindex): - self.reindex = reindex - def run(self,recommender,user): - """ """ - best_tags = recommender.items_repository.get_relevant_tags(user.items(), - 50) - debtags_index = DebtagsIndex( - os.path.expanduser("~/.app-recommender/debtags_index")) - debtags_index.load(recommender.items_repository,self.reindex) - + """ Perform recommendation strategy """ + profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) qp = xapian.QueryParser() - query = qp.parse_query(best_tags) - enquire = xapian.Enquire(debtags_index.index) + query = qp.parse_query(profile) + enquire = xapian.Enquire(recommender.items_repository) + enquire.set_query(query) + + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) + item_score = {} + for m in mset: + item_score[m.document.get_data()] = m.rank + return RecommendationResult(item_score,20) + +class AxiContentBasedStrategy(RecommendationStrategy): + """ Content-based recommendation strategy based on Apt-xapian-index. """ + def run(self,recommender,user): + """ Perform recommendation strategy """ + profile = user.axi_tag_profile(recommender.items_repository,50) + query = xapian.Query(xapian.Query.OP_OR,profile) + enquire = xapian.Enquire(recommender.items_repository) enquire.set_query(query) mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) @@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy): class ColaborativeStrategy(RecommendationStrategy): """ Colaborative recommendation strategy. """ def run(self,user,users_repository,similarity_measure): - """ """ + """ Perform recommendation strategy """ return RecomendationResult() class KnowledgeBasedStrategy(RecommendationStrategy): """ Knowledge-based recommendation strategy. """ def run(self,user,knowledge_repository): - """ """ + """ Perform recommendation strategy """ return RecomendationResult() class DemographicStrategy(RecommendationStrategy): """ Recommendation strategy based on demographic data. """ def run(self,user,items_repository): - """ """ + """ Perform recommendation strategy """ return RecomendationResult() diff --git a/src/user.py b/src/user.py index 788fdb9..6849173 100644 --- a/src/user.py +++ b/src/user.py @@ -18,6 +18,14 @@ # along with this program. If not, see . import commands +import xapian + +class FilterTag(xapian.ExpandDecider): + def __call__(self, term): + """ + Return true if the term is a tag, else false + """ + return term[:2] == "XT" class User: """ """ @@ -30,6 +38,26 @@ class User: def items(self): return self.item_score.keys() + def axi_tag_profile(self,xapian_db,profile_size): + terms = [] + for item in self.items(): + terms.append("XP"+item) + query = xapian.Query(xapian.Query.OP_OR, terms) + enquire = xapian.Enquire(xapian_db) + enquire.set_query(query) + rset = xapian.RSet() + for m in enquire.get_mset(0,30000): #consider all matches + rset.add_document(m.docid) + eset = enquire.get_eset(profile_size, rset, FilterTag()) + profile = [] + for res in eset: + profile.append(res.term) + #print "%.2f %s" % (res.weight,res.term[2:]) + return profile + + def debtags_tag_profile(self,debtags_db,profile_size): + return debtags_db.get_relevant_tags(self.items(),profile_size) + class LocalSystem(User): """ """ def __init__(self): -- libgit2 0.21.2