Commit 994c12253b2c51b8caff226c9a2821402bf094e7
1 parent
bb8d206b
Exists in
master
and in
1 other branch
- Apt-xapian-index is now considered as an item repository.
- Created AxiContentBasedStrategy() to perform recommendation based on axi. - Created different methods for extracting user profile from DebtagsDB and axi. - DebtagsIndex class now inherits from xapian.WritableDatabase, so that it can be substituted by axi database when convenient. (close #1)
Showing
6 changed files
with
92 additions
and
43 deletions
Show diff stats
src/app_recommender.py
@@ -26,33 +26,50 @@ from recommender import * | @@ -26,33 +26,50 @@ from recommender import * | ||
26 | from strategy import * | 26 | from strategy import * |
27 | from user import * | 27 | from user import * |
28 | 28 | ||
29 | +# Setup configuration | ||
29 | DB_PATH = "/var/lib/debtags/package-tags" | 30 | DB_PATH = "/var/lib/debtags/package-tags" |
30 | INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") | 31 | INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") |
31 | 32 | ||
33 | +XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index") | ||
34 | +XAPIANDB = XAPIANDBPATH + "/index" | ||
35 | +XAPIANDBVALUES = XAPIANDBPATH + "/values" | ||
36 | + | ||
32 | if __name__ == '__main__': | 37 | if __name__ == '__main__': |
33 | 38 | ||
34 | reindex = 0 | 39 | reindex = 0 |
40 | + axi = 0 | ||
35 | if len(sys.argv) == 2: | 41 | if len(sys.argv) == 2: |
36 | - DB_PATH = sys.argv[1] | ||
37 | - reindex = 1 | ||
38 | - print "reindex true" | 42 | + if sys.argv[1] == "axi": |
43 | + axi = 1 | ||
44 | + else: | ||
45 | + DB_PATH = sys.argv[1] | ||
46 | + reindex = 1 | ||
39 | elif len(sys.argv) > 2: | 47 | elif len(sys.argv) > 2: |
40 | print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % | 48 | print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % |
41 | sys.argv[0]) | 49 | sys.argv[0]) |
42 | sys.exit(1) | 50 | sys.exit(1) |
43 | 51 | ||
44 | - debtags_db = DebtagsDB(DB_PATH) | ||
45 | - if not debtags_db.load(): sys.exit(1) | 52 | + if axi: |
53 | + axi_db = xapian.Database(XAPIANDB) | ||
54 | + app_rec = Recommender(axi_db) | ||
55 | + app_rec.set_strategy(AxiContentBasedStrategy()) | ||
56 | + else: | ||
57 | + debtags_db = DebtagsDB(DB_PATH) | ||
58 | + if not debtags_db.load(): | ||
59 | + print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH) | ||
60 | + sys.exit(1) | ||
61 | + debtags_index = DebtagsIndex( | ||
62 | + os.path.expanduser("~/.app-recommender/debtags_index")) | ||
63 | + debtags_index.load(debtags_db,reindex) | ||
64 | + app_rec = Recommender(debtags_index) | ||
65 | + app_rec.set_strategy(ContentBasedStrategy()) | ||
46 | 66 | ||
47 | user = LocalSystem() | 67 | user = LocalSystem() |
48 | - recommender = Recommender(items_repository=debtags_db, | ||
49 | - strategy=ContentBasedStrategy(reindex)) | ||
50 | - | ||
51 | - result = recommender.generate_recommendation(user) | 68 | + result = app_rec.get_recommendation(user) |
52 | result.print_result() | 69 | result.print_result() |
53 | 70 | ||
54 | metrics = [] | 71 | metrics = [] |
55 | metrics.append(Precision()) | 72 | metrics.append(Precision()) |
56 | metrics.append(Recall()) | 73 | metrics.append(Recall()) |
57 | - validation = CrossValidation(0.1,10,recommender,metrics) | 74 | + validation = CrossValidation(0.1,10,app_rec,metrics) |
58 | validation.run(user) | 75 | validation.run(user) |
src/data.py
@@ -41,9 +41,7 @@ def normalize_tags(string): | @@ -41,9 +41,7 @@ def normalize_tags(string): | ||
41 | """ Normalize tag string so that it can be indexed and retrieved. """ | 41 | """ Normalize tag string so that it can be indexed and retrieved. """ |
42 | return string.replace(':','_').replace('-','\'') | 42 | return string.replace(':','_').replace('-','\'') |
43 | 43 | ||
44 | -class DataRepository: | ||
45 | - """ """ | ||
46 | - # FIXME todos os repositorios devem ser singleton | 44 | +# FIXME Data repositories should be singleton |
47 | 45 | ||
48 | class DebtagsDB(debtags.DB): | 46 | class DebtagsDB(debtags.DB): |
49 | def __init__(self,path): | 47 | def __init__(self,path): |
@@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB): | @@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB): | ||
68 | relevance_index(b))) | 66 | relevance_index(b))) |
69 | return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) | 67 | return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) |
70 | 68 | ||
71 | -class DebtagsIndex: | 69 | +class DebtagsIndex(xapian.WritableDatabase): |
72 | def __init__(self,path): | 70 | def __init__(self,path): |
73 | self.path = path | 71 | self.path = path |
74 | 72 | ||
75 | def load(self,debtags_db,reindex): | 73 | def load(self,debtags_db,reindex): |
76 | """ Load an existing debtags index. """ | 74 | """ Load an existing debtags index. """ |
75 | + self.debtags_db = debtags_db | ||
77 | if not reindex: | 76 | if not reindex: |
78 | try: | 77 | try: |
79 | - #print ("Opening existing debtags xapian index at \'%s\'" % | ||
80 | - # self.path) | ||
81 | - self.index = xapian.Database(self.path) | ||
82 | - except DatabaseError: | 78 | + print ("Opening existing debtags xapian index at \'%s\'" % |
79 | + self.path) | ||
80 | + xapian.Database.__init__(self,self.path) | ||
81 | + except xapian.DatabaseError: | ||
83 | print "Could not open debtags xapian index" | 82 | print "Could not open debtags xapian index" |
84 | reindex =1 | 83 | reindex =1 |
85 | if reindex: | 84 | if reindex: |
@@ -92,11 +91,11 @@ class DebtagsIndex: | @@ -92,11 +91,11 @@ class DebtagsIndex: | ||
92 | if not os.path.exists(self.path): | 91 | if not os.path.exists(self.path): |
93 | os.makedirs(self.path) | 92 | os.makedirs(self.path) |
94 | print "Creating new debtags xapian index at \'%s\'" % self.path | 93 | print "Creating new debtags xapian index at \'%s\'" % self.path |
95 | - self.index = xapian.WritableDatabase(self.path, | 94 | + xapian.WritableDatabase.__init__(self,self.path, |
96 | xapian.DB_CREATE_OR_OVERWRITE) | 95 | xapian.DB_CREATE_OR_OVERWRITE) |
97 | for pkg,tags in debtags_db.iter_packages_tags(): | 96 | for pkg,tags in debtags_db.iter_packages_tags(): |
98 | doc = xapian.Document() | 97 | doc = xapian.Document() |
99 | doc.set_data(pkg) | 98 | doc.set_data(pkg) |
100 | for tag in tags: | 99 | for tag in tags: |
101 | doc.add_term(normalize_tags(tag)) | 100 | doc.add_term(normalize_tags(tag)) |
102 | - print "indexing ",self.index.add_document(doc) | 101 | + print "indexing ",self.add_document(doc) |
src/evaluation.py
@@ -139,7 +139,7 @@ class CrossValidation: | @@ -139,7 +139,7 @@ class CrossValidation: | ||
139 | exit(1) | 139 | exit(1) |
140 | round_partition[random_key] = cross_item_score.pop(random_key) | 140 | round_partition[random_key] = cross_item_score.pop(random_key) |
141 | round_user = User(cross_item_score) | 141 | round_user = User(cross_item_score) |
142 | - predicted_result = self.recommender.generate_recommendation(round_user) | 142 | + predicted_result = self.recommender.get_recommendation(round_user) |
143 | real_result = RecommendationResult(round_partition,len(round_partition)) | 143 | real_result = RecommendationResult(round_partition,len(round_partition)) |
144 | evaluation = Evaluation(predicted_result,real_result) | 144 | evaluation = Evaluation(predicted_result,real_result) |
145 | for metric in self.metrics_list: | 145 | for metric in self.metrics_list: |
src/recommender.py
@@ -35,17 +35,16 @@ class RecommendationResult: | @@ -35,17 +35,16 @@ class RecommendationResult: | ||
35 | 35 | ||
36 | class Recommender: | 36 | class Recommender: |
37 | """ """ | 37 | """ """ |
38 | - def __init__(self,items_repository=None,users_repository=None, | ||
39 | - knowledge_repository=None,strategy=None): | 38 | + def __init__(self,items_repository,users_repository=None, |
39 | + knowledge_repository=None): | ||
40 | self.items_repository = items_repository | 40 | self.items_repository = items_repository |
41 | self.users_repository = users_repository | 41 | self.users_repository = users_repository |
42 | self.knowledge_repository = knowledge_repository | 42 | self.knowledge_repository = knowledge_repository |
43 | - self.strategy = strategy | ||
44 | 43 | ||
45 | def set_strategy(self,strategy): | 44 | def set_strategy(self,strategy): |
46 | """ """ | 45 | """ """ |
47 | self.strategy = strategy | 46 | self.strategy = strategy |
48 | 47 | ||
49 | - def generate_recommendation(self,user): | 48 | + def get_recommendation(self,user): |
50 | """ """ | 49 | """ """ |
51 | return self.strategy.run(self,user) | 50 | return self.strategy.run(self,user) |
src/strategy.py
@@ -17,6 +17,7 @@ | @@ -17,6 +17,7 @@ | ||
17 | # You should have received a copy of the GNU General Public License | 17 | # You should have received a copy of the GNU General Public License |
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | ||
20 | +import os, re | ||
20 | import xapian | 21 | import xapian |
21 | from data import * | 22 | from data import * |
22 | from recommender import * | 23 | from recommender import * |
@@ -51,27 +52,32 @@ class RecommendationStrategy: | @@ -51,27 +52,32 @@ class RecommendationStrategy: | ||
51 | class ItemReputationStrategy(RecommendationStrategy): | 52 | class ItemReputationStrategy(RecommendationStrategy): |
52 | """ Recommendation strategy based on items reputation. """ | 53 | """ Recommendation strategy based on items reputation. """ |
53 | def run(self,items_list,heuristic): | 54 | def run(self,items_list,heuristic): |
54 | - """ """ | 55 | + """ Perform recommendation strategy """ |
55 | return RecomendationResult() | 56 | return RecomendationResult() |
56 | 57 | ||
57 | class ContentBasedStrategy(RecommendationStrategy): | 58 | class ContentBasedStrategy(RecommendationStrategy): |
58 | """ Content-based recommendation strategy. """ | 59 | """ Content-based recommendation strategy. """ |
59 | - #def __init__(self,items_repository): | ||
60 | - # self.items_repository = items_repository | ||
61 | - def __init__(self,reindex): | ||
62 | - self.reindex = reindex | ||
63 | - | ||
64 | def run(self,recommender,user): | 60 | def run(self,recommender,user): |
65 | - """ """ | ||
66 | - best_tags = recommender.items_repository.get_relevant_tags(user.items(), | ||
67 | - 50) | ||
68 | - debtags_index = DebtagsIndex( | ||
69 | - os.path.expanduser("~/.app-recommender/debtags_index")) | ||
70 | - debtags_index.load(recommender.items_repository,self.reindex) | ||
71 | - | 61 | + """ Perform recommendation strategy """ |
62 | + profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) | ||
72 | qp = xapian.QueryParser() | 63 | qp = xapian.QueryParser() |
73 | - query = qp.parse_query(best_tags) | ||
74 | - enquire = xapian.Enquire(debtags_index.index) | 64 | + query = qp.parse_query(profile) |
65 | + enquire = xapian.Enquire(recommender.items_repository) | ||
66 | + enquire.set_query(query) | ||
67 | + | ||
68 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | ||
69 | + item_score = {} | ||
70 | + for m in mset: | ||
71 | + item_score[m.document.get_data()] = m.rank | ||
72 | + return RecommendationResult(item_score,20) | ||
73 | + | ||
74 | +class AxiContentBasedStrategy(RecommendationStrategy): | ||
75 | + """ Content-based recommendation strategy based on Apt-xapian-index. """ | ||
76 | + def run(self,recommender,user): | ||
77 | + """ Perform recommendation strategy """ | ||
78 | + profile = user.axi_tag_profile(recommender.items_repository,50) | ||
79 | + query = xapian.Query(xapian.Query.OP_OR,profile) | ||
80 | + enquire = xapian.Enquire(recommender.items_repository) | ||
75 | enquire.set_query(query) | 81 | enquire.set_query(query) |
76 | 82 | ||
77 | mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | 83 | mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) |
@@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy): | @@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy): | ||
83 | class ColaborativeStrategy(RecommendationStrategy): | 89 | class ColaborativeStrategy(RecommendationStrategy): |
84 | """ Colaborative recommendation strategy. """ | 90 | """ Colaborative recommendation strategy. """ |
85 | def run(self,user,users_repository,similarity_measure): | 91 | def run(self,user,users_repository,similarity_measure): |
86 | - """ """ | 92 | + """ Perform recommendation strategy """ |
87 | return RecomendationResult() | 93 | return RecomendationResult() |
88 | 94 | ||
89 | class KnowledgeBasedStrategy(RecommendationStrategy): | 95 | class KnowledgeBasedStrategy(RecommendationStrategy): |
90 | """ Knowledge-based recommendation strategy. """ | 96 | """ Knowledge-based recommendation strategy. """ |
91 | def run(self,user,knowledge_repository): | 97 | def run(self,user,knowledge_repository): |
92 | - """ """ | 98 | + """ Perform recommendation strategy """ |
93 | return RecomendationResult() | 99 | return RecomendationResult() |
94 | 100 | ||
95 | class DemographicStrategy(RecommendationStrategy): | 101 | class DemographicStrategy(RecommendationStrategy): |
96 | """ Recommendation strategy based on demographic data. """ | 102 | """ Recommendation strategy based on demographic data. """ |
97 | def run(self,user,items_repository): | 103 | def run(self,user,items_repository): |
98 | - """ """ | 104 | + """ Perform recommendation strategy """ |
99 | return RecomendationResult() | 105 | return RecomendationResult() |
src/user.py
@@ -18,6 +18,14 @@ | @@ -18,6 +18,14 @@ | ||
18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | ||
20 | import commands | 20 | import commands |
21 | +import xapian | ||
22 | + | ||
23 | +class FilterTag(xapian.ExpandDecider): | ||
24 | + def __call__(self, term): | ||
25 | + """ | ||
26 | + Return true if the term is a tag, else false | ||
27 | + """ | ||
28 | + return term[:2] == "XT" | ||
21 | 29 | ||
22 | class User: | 30 | class User: |
23 | """ """ | 31 | """ """ |
@@ -30,6 +38,26 @@ class User: | @@ -30,6 +38,26 @@ class User: | ||
30 | def items(self): | 38 | def items(self): |
31 | return self.item_score.keys() | 39 | return self.item_score.keys() |
32 | 40 | ||
41 | + def axi_tag_profile(self,xapian_db,profile_size): | ||
42 | + terms = [] | ||
43 | + for item in self.items(): | ||
44 | + terms.append("XP"+item) | ||
45 | + query = xapian.Query(xapian.Query.OP_OR, terms) | ||
46 | + enquire = xapian.Enquire(xapian_db) | ||
47 | + enquire.set_query(query) | ||
48 | + rset = xapian.RSet() | ||
49 | + for m in enquire.get_mset(0,30000): #consider all matches | ||
50 | + rset.add_document(m.docid) | ||
51 | + eset = enquire.get_eset(profile_size, rset, FilterTag()) | ||
52 | + profile = [] | ||
53 | + for res in eset: | ||
54 | + profile.append(res.term) | ||
55 | + #print "%.2f %s" % (res.weight,res.term[2:]) | ||
56 | + return profile | ||
57 | + | ||
58 | + def debtags_tag_profile(self,debtags_db,profile_size): | ||
59 | + return debtags_db.get_relevant_tags(self.items(),profile_size) | ||
60 | + | ||
33 | class LocalSystem(User): | 61 | class LocalSystem(User): |
34 | """ """ | 62 | """ """ |
35 | def __init__(self): | 63 | def __init__(self): |