Commit 994c12253b2c51b8caff226c9a2821402bf094e7
1 parent
bb8d206b
Exists in
master
and in
1 other branch
- Apt-xapian-index is now considered as an item repository.
- Created AxiContentBasedStrategy() to perform recommendation based on axi. - Created different methods for extracting user profile from DebtagsDB and axi. - DebtagsIndex class now inherits from xapian.WritableDatabase, so that it can be substituted by axi database when convenient. (close #1)
Showing
6 changed files
with
92 additions
and
43 deletions
Show diff stats
src/app_recommender.py
... | ... | @@ -26,33 +26,50 @@ from recommender import * |
26 | 26 | from strategy import * |
27 | 27 | from user import * |
28 | 28 | |
29 | +# Setup configuration | |
29 | 30 | DB_PATH = "/var/lib/debtags/package-tags" |
30 | 31 | INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") |
31 | 32 | |
33 | +XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index") | |
34 | +XAPIANDB = XAPIANDBPATH + "/index" | |
35 | +XAPIANDBVALUES = XAPIANDBPATH + "/values" | |
36 | + | |
32 | 37 | if __name__ == '__main__': |
33 | 38 | |
34 | 39 | reindex = 0 |
40 | + axi = 0 | |
35 | 41 | if len(sys.argv) == 2: |
36 | - DB_PATH = sys.argv[1] | |
37 | - reindex = 1 | |
38 | - print "reindex true" | |
42 | + if sys.argv[1] == "axi": | |
43 | + axi = 1 | |
44 | + else: | |
45 | + DB_PATH = sys.argv[1] | |
46 | + reindex = 1 | |
39 | 47 | elif len(sys.argv) > 2: |
40 | 48 | print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % |
41 | 49 | sys.argv[0]) |
42 | 50 | sys.exit(1) |
43 | 51 | |
44 | - debtags_db = DebtagsDB(DB_PATH) | |
45 | - if not debtags_db.load(): sys.exit(1) | |
52 | + if axi: | |
53 | + axi_db = xapian.Database(XAPIANDB) | |
54 | + app_rec = Recommender(axi_db) | |
55 | + app_rec.set_strategy(AxiContentBasedStrategy()) | |
56 | + else: | |
57 | + debtags_db = DebtagsDB(DB_PATH) | |
58 | + if not debtags_db.load(): | |
59 | + print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH) | |
60 | + sys.exit(1) | |
61 | + debtags_index = DebtagsIndex( | |
62 | + os.path.expanduser("~/.app-recommender/debtags_index")) | |
63 | + debtags_index.load(debtags_db,reindex) | |
64 | + app_rec = Recommender(debtags_index) | |
65 | + app_rec.set_strategy(ContentBasedStrategy()) | |
46 | 66 | |
47 | 67 | user = LocalSystem() |
48 | - recommender = Recommender(items_repository=debtags_db, | |
49 | - strategy=ContentBasedStrategy(reindex)) | |
50 | - | |
51 | - result = recommender.generate_recommendation(user) | |
68 | + result = app_rec.get_recommendation(user) | |
52 | 69 | result.print_result() |
53 | 70 | |
54 | 71 | metrics = [] |
55 | 72 | metrics.append(Precision()) |
56 | 73 | metrics.append(Recall()) |
57 | - validation = CrossValidation(0.1,10,recommender,metrics) | |
74 | + validation = CrossValidation(0.1,10,app_rec,metrics) | |
58 | 75 | validation.run(user) | ... | ... |
src/data.py
... | ... | @@ -41,9 +41,7 @@ def normalize_tags(string): |
41 | 41 | """ Normalize tag string so that it can be indexed and retrieved. """ |
42 | 42 | return string.replace(':','_').replace('-','\'') |
43 | 43 | |
44 | -class DataRepository: | |
45 | - """ """ | |
46 | - # FIXME todos os repositorios devem ser singleton | |
44 | +# FIXME Data repositories should be singleton | |
47 | 45 | |
48 | 46 | class DebtagsDB(debtags.DB): |
49 | 47 | def __init__(self,path): |
... | ... | @@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB): |
68 | 66 | relevance_index(b))) |
69 | 67 | return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) |
70 | 68 | |
71 | -class DebtagsIndex: | |
69 | +class DebtagsIndex(xapian.WritableDatabase): | |
72 | 70 | def __init__(self,path): |
73 | 71 | self.path = path |
74 | 72 | |
75 | 73 | def load(self,debtags_db,reindex): |
76 | 74 | """ Load an existing debtags index. """ |
75 | + self.debtags_db = debtags_db | |
77 | 76 | if not reindex: |
78 | 77 | try: |
79 | - #print ("Opening existing debtags xapian index at \'%s\'" % | |
80 | - # self.path) | |
81 | - self.index = xapian.Database(self.path) | |
82 | - except DatabaseError: | |
78 | + print ("Opening existing debtags xapian index at \'%s\'" % | |
79 | + self.path) | |
80 | + xapian.Database.__init__(self,self.path) | |
81 | + except xapian.DatabaseError: | |
83 | 82 | print "Could not open debtags xapian index" |
84 | 83 | reindex =1 |
85 | 84 | if reindex: |
... | ... | @@ -92,11 +91,11 @@ class DebtagsIndex: |
92 | 91 | if not os.path.exists(self.path): |
93 | 92 | os.makedirs(self.path) |
94 | 93 | print "Creating new debtags xapian index at \'%s\'" % self.path |
95 | - self.index = xapian.WritableDatabase(self.path, | |
94 | + xapian.WritableDatabase.__init__(self,self.path, | |
96 | 95 | xapian.DB_CREATE_OR_OVERWRITE) |
97 | 96 | for pkg,tags in debtags_db.iter_packages_tags(): |
98 | 97 | doc = xapian.Document() |
99 | 98 | doc.set_data(pkg) |
100 | 99 | for tag in tags: |
101 | 100 | doc.add_term(normalize_tags(tag)) |
102 | - print "indexing ",self.index.add_document(doc) | |
101 | + print "indexing ",self.add_document(doc) | ... | ... |
src/evaluation.py
... | ... | @@ -139,7 +139,7 @@ class CrossValidation: |
139 | 139 | exit(1) |
140 | 140 | round_partition[random_key] = cross_item_score.pop(random_key) |
141 | 141 | round_user = User(cross_item_score) |
142 | - predicted_result = self.recommender.generate_recommendation(round_user) | |
142 | + predicted_result = self.recommender.get_recommendation(round_user) | |
143 | 143 | real_result = RecommendationResult(round_partition,len(round_partition)) |
144 | 144 | evaluation = Evaluation(predicted_result,real_result) |
145 | 145 | for metric in self.metrics_list: | ... | ... |
src/recommender.py
... | ... | @@ -35,17 +35,16 @@ class RecommendationResult: |
35 | 35 | |
36 | 36 | class Recommender: |
37 | 37 | """ """ |
38 | - def __init__(self,items_repository=None,users_repository=None, | |
39 | - knowledge_repository=None,strategy=None): | |
38 | + def __init__(self,items_repository,users_repository=None, | |
39 | + knowledge_repository=None): | |
40 | 40 | self.items_repository = items_repository |
41 | 41 | self.users_repository = users_repository |
42 | 42 | self.knowledge_repository = knowledge_repository |
43 | - self.strategy = strategy | |
44 | 43 | |
45 | 44 | def set_strategy(self,strategy): |
46 | 45 | """ """ |
47 | 46 | self.strategy = strategy |
48 | 47 | |
49 | - def generate_recommendation(self,user): | |
48 | + def get_recommendation(self,user): | |
50 | 49 | """ """ |
51 | 50 | return self.strategy.run(self,user) | ... | ... |
src/strategy.py
... | ... | @@ -17,6 +17,7 @@ |
17 | 17 | # You should have received a copy of the GNU General Public License |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | +import os, re | |
20 | 21 | import xapian |
21 | 22 | from data import * |
22 | 23 | from recommender import * |
... | ... | @@ -51,27 +52,32 @@ class RecommendationStrategy: |
51 | 52 | class ItemReputationStrategy(RecommendationStrategy): |
52 | 53 | """ Recommendation strategy based on items reputation. """ |
53 | 54 | def run(self,items_list,heuristic): |
54 | - """ """ | |
55 | + """ Perform recommendation strategy """ | |
55 | 56 | return RecomendationResult() |
56 | 57 | |
57 | 58 | class ContentBasedStrategy(RecommendationStrategy): |
58 | 59 | """ Content-based recommendation strategy. """ |
59 | - #def __init__(self,items_repository): | |
60 | - # self.items_repository = items_repository | |
61 | - def __init__(self,reindex): | |
62 | - self.reindex = reindex | |
63 | - | |
64 | 60 | def run(self,recommender,user): |
65 | - """ """ | |
66 | - best_tags = recommender.items_repository.get_relevant_tags(user.items(), | |
67 | - 50) | |
68 | - debtags_index = DebtagsIndex( | |
69 | - os.path.expanduser("~/.app-recommender/debtags_index")) | |
70 | - debtags_index.load(recommender.items_repository,self.reindex) | |
71 | - | |
61 | + """ Perform recommendation strategy """ | |
62 | + profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50) | |
72 | 63 | qp = xapian.QueryParser() |
73 | - query = qp.parse_query(best_tags) | |
74 | - enquire = xapian.Enquire(debtags_index.index) | |
64 | + query = qp.parse_query(profile) | |
65 | + enquire = xapian.Enquire(recommender.items_repository) | |
66 | + enquire.set_query(query) | |
67 | + | |
68 | + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
69 | + item_score = {} | |
70 | + for m in mset: | |
71 | + item_score[m.document.get_data()] = m.rank | |
72 | + return RecommendationResult(item_score,20) | |
73 | + | |
74 | +class AxiContentBasedStrategy(RecommendationStrategy): | |
75 | + """ Content-based recommendation strategy based on Apt-xapian-index. """ | |
76 | + def run(self,recommender,user): | |
77 | + """ Perform recommendation strategy """ | |
78 | + profile = user.axi_tag_profile(recommender.items_repository,50) | |
79 | + query = xapian.Query(xapian.Query.OP_OR,profile) | |
80 | + enquire = xapian.Enquire(recommender.items_repository) | |
75 | 81 | enquire.set_query(query) |
76 | 82 | |
77 | 83 | mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) |
... | ... | @@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy): |
83 | 89 | class ColaborativeStrategy(RecommendationStrategy): |
84 | 90 | """ Colaborative recommendation strategy. """ |
85 | 91 | def run(self,user,users_repository,similarity_measure): |
86 | - """ """ | |
92 | + """ Perform recommendation strategy """ | |
87 | 93 | return RecomendationResult() |
88 | 94 | |
89 | 95 | class KnowledgeBasedStrategy(RecommendationStrategy): |
90 | 96 | """ Knowledge-based recommendation strategy. """ |
91 | 97 | def run(self,user,knowledge_repository): |
92 | - """ """ | |
98 | + """ Perform recommendation strategy """ | |
93 | 99 | return RecomendationResult() |
94 | 100 | |
95 | 101 | class DemographicStrategy(RecommendationStrategy): |
96 | 102 | """ Recommendation strategy based on demographic data. """ |
97 | 103 | def run(self,user,items_repository): |
98 | - """ """ | |
104 | + """ Perform recommendation strategy """ | |
99 | 105 | return RecomendationResult() | ... | ... |
src/user.py
... | ... | @@ -18,6 +18,14 @@ |
18 | 18 | # along with this program. If not, see <http://www.gnu.org/licenses/>. |
19 | 19 | |
20 | 20 | import commands |
21 | +import xapian | |
22 | + | |
23 | +class FilterTag(xapian.ExpandDecider): | |
24 | + def __call__(self, term): | |
25 | + """ | |
26 | + Return true if the term is a tag, else false | |
27 | + """ | |
28 | + return term[:2] == "XT" | |
21 | 29 | |
22 | 30 | class User: |
23 | 31 | """ """ |
... | ... | @@ -30,6 +38,26 @@ class User: |
30 | 38 | def items(self): |
31 | 39 | return self.item_score.keys() |
32 | 40 | |
41 | + def axi_tag_profile(self,xapian_db,profile_size): | |
42 | + terms = [] | |
43 | + for item in self.items(): | |
44 | + terms.append("XP"+item) | |
45 | + query = xapian.Query(xapian.Query.OP_OR, terms) | |
46 | + enquire = xapian.Enquire(xapian_db) | |
47 | + enquire.set_query(query) | |
48 | + rset = xapian.RSet() | |
49 | + for m in enquire.get_mset(0,30000): #consider all matches | |
50 | + rset.add_document(m.docid) | |
51 | + eset = enquire.get_eset(profile_size, rset, FilterTag()) | |
52 | + profile = [] | |
53 | + for res in eset: | |
54 | + profile.append(res.term) | |
55 | + #print "%.2f %s" % (res.weight,res.term[2:]) | |
56 | + return profile | |
57 | + | |
58 | + def debtags_tag_profile(self,debtags_db,profile_size): | |
59 | + return debtags_db.get_relevant_tags(self.items(),profile_size) | |
60 | + | |
33 | 61 | class LocalSystem(User): |
34 | 62 | """ """ |
35 | 63 | def __init__(self): | ... | ... |