Commit 994c12253b2c51b8caff226c9a2821402bf094e7

Authored by Tássia Camões Araújo
1 parent bb8d206b
Exists in master and in 1 other branch add_vagrant

- Apt-xapian-index is now considered as an item repository.

- Created AxiContentBasedStrategy() to perform recommendation based on axi.
- Created different methods for extracting user profile from DebtagsDB and axi.
- DebtagsIndex class now inherits from xapian.WritableDatabase, so that it can
  be substituted by axi database when convenient.
(close #1)
src/app_recommender.py
... ... @@ -26,33 +26,50 @@ from recommender import *
26 26 from strategy import *
27 27 from user import *
28 28  
  29 +# Setup configuration
29 30 DB_PATH = "/var/lib/debtags/package-tags"
30 31 INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
31 32  
  33 +XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
  34 +XAPIANDB = XAPIANDBPATH + "/index"
  35 +XAPIANDBVALUES = XAPIANDBPATH + "/values"
  36 +
32 37 if __name__ == '__main__':
33 38  
34 39 reindex = 0
  40 + axi = 0
35 41 if len(sys.argv) == 2:
36   - DB_PATH = sys.argv[1]
37   - reindex = 1
38   - print "reindex true"
  42 + if sys.argv[1] == "axi":
  43 + axi = 1
  44 + else:
  45 + DB_PATH = sys.argv[1]
  46 + reindex = 1
39 47 elif len(sys.argv) > 2:
40 48 print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
41 49 sys.argv[0])
42 50 sys.exit(1)
43 51  
44   - debtags_db = DebtagsDB(DB_PATH)
45   - if not debtags_db.load(): sys.exit(1)
  52 + if axi:
  53 + axi_db = xapian.Database(XAPIANDB)
  54 + app_rec = Recommender(axi_db)
  55 + app_rec.set_strategy(AxiContentBasedStrategy())
  56 + else:
  57 + debtags_db = DebtagsDB(DB_PATH)
  58 + if not debtags_db.load():
  59 + print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH)
  60 + sys.exit(1)
  61 + debtags_index = DebtagsIndex(
  62 + os.path.expanduser("~/.app-recommender/debtags_index"))
  63 + debtags_index.load(debtags_db,reindex)
  64 + app_rec = Recommender(debtags_index)
  65 + app_rec.set_strategy(ContentBasedStrategy())
46 66  
47 67 user = LocalSystem()
48   - recommender = Recommender(items_repository=debtags_db,
49   - strategy=ContentBasedStrategy(reindex))
50   -
51   - result = recommender.generate_recommendation(user)
  68 + result = app_rec.get_recommendation(user)
52 69 result.print_result()
53 70  
54 71 metrics = []
55 72 metrics.append(Precision())
56 73 metrics.append(Recall())
57   - validation = CrossValidation(0.1,10,recommender,metrics)
  74 + validation = CrossValidation(0.1,10,app_rec,metrics)
58 75 validation.run(user)
... ...
src/data.py
... ... @@ -41,9 +41,7 @@ def normalize_tags(string):
41 41 """ Normalize tag string so that it can be indexed and retrieved. """
42 42 return string.replace(':','_').replace('-','\'')
43 43  
44   -class DataRepository:
45   - """ """
46   - # FIXME todos os repositorios devem ser singleton
  44 +# FIXME Data repositories should be singleton
47 45  
48 46 class DebtagsDB(debtags.DB):
49 47 def __init__(self,path):
... ... @@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB):
68 66 relevance_index(b)))
69 67 return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
70 68  
71   -class DebtagsIndex:
  69 +class DebtagsIndex(xapian.WritableDatabase):
72 70 def __init__(self,path):
73 71 self.path = path
74 72  
75 73 def load(self,debtags_db,reindex):
76 74 """ Load an existing debtags index. """
  75 + self.debtags_db = debtags_db
77 76 if not reindex:
78 77 try:
79   - #print ("Opening existing debtags xapian index at \'%s\'" %
80   - # self.path)
81   - self.index = xapian.Database(self.path)
82   - except DatabaseError:
  78 + print ("Opening existing debtags xapian index at \'%s\'" %
  79 + self.path)
  80 + xapian.Database.__init__(self,self.path)
  81 + except xapian.DatabaseError:
83 82 print "Could not open debtags xapian index"
84 83 reindex =1
85 84 if reindex:
... ... @@ -92,11 +91,11 @@ class DebtagsIndex:
92 91 if not os.path.exists(self.path):
93 92 os.makedirs(self.path)
94 93 print "Creating new debtags xapian index at \'%s\'" % self.path
95   - self.index = xapian.WritableDatabase(self.path,
  94 + xapian.WritableDatabase.__init__(self,self.path,
96 95 xapian.DB_CREATE_OR_OVERWRITE)
97 96 for pkg,tags in debtags_db.iter_packages_tags():
98 97 doc = xapian.Document()
99 98 doc.set_data(pkg)
100 99 for tag in tags:
101 100 doc.add_term(normalize_tags(tag))
102   - print "indexing ",self.index.add_document(doc)
  101 + print "indexing ",self.add_document(doc)
... ...
src/evaluation.py
... ... @@ -139,7 +139,7 @@ class CrossValidation:
139 139 exit(1)
140 140 round_partition[random_key] = cross_item_score.pop(random_key)
141 141 round_user = User(cross_item_score)
142   - predicted_result = self.recommender.generate_recommendation(round_user)
  142 + predicted_result = self.recommender.get_recommendation(round_user)
143 143 real_result = RecommendationResult(round_partition,len(round_partition))
144 144 evaluation = Evaluation(predicted_result,real_result)
145 145 for metric in self.metrics_list:
... ...
src/recommender.py
... ... @@ -35,17 +35,16 @@ class RecommendationResult:
35 35  
36 36 class Recommender:
37 37 """ """
38   - def __init__(self,items_repository=None,users_repository=None,
39   - knowledge_repository=None,strategy=None):
  38 + def __init__(self,items_repository,users_repository=None,
  39 + knowledge_repository=None):
40 40 self.items_repository = items_repository
41 41 self.users_repository = users_repository
42 42 self.knowledge_repository = knowledge_repository
43   - self.strategy = strategy
44 43  
45 44 def set_strategy(self,strategy):
46 45 """ """
47 46 self.strategy = strategy
48 47  
49   - def generate_recommendation(self,user):
  48 + def get_recommendation(self,user):
50 49 """ """
51 50 return self.strategy.run(self,user)
... ...
src/strategy.py
... ... @@ -17,6 +17,7 @@
17 17 # You should have received a copy of the GNU General Public License
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
  20 +import os, re
20 21 import xapian
21 22 from data import *
22 23 from recommender import *
... ... @@ -51,27 +52,32 @@ class RecommendationStrategy:
51 52 class ItemReputationStrategy(RecommendationStrategy):
52 53 """ Recommendation strategy based on items reputation. """
53 54 def run(self,items_list,heuristic):
54   - """ """
  55 + """ Perform recommendation strategy """
55 56 return RecomendationResult()
56 57  
57 58 class ContentBasedStrategy(RecommendationStrategy):
58 59 """ Content-based recommendation strategy. """
59   - #def __init__(self,items_repository):
60   - # self.items_repository = items_repository
61   - def __init__(self,reindex):
62   - self.reindex = reindex
63   -
64 60 def run(self,recommender,user):
65   - """ """
66   - best_tags = recommender.items_repository.get_relevant_tags(user.items(),
67   - 50)
68   - debtags_index = DebtagsIndex(
69   - os.path.expanduser("~/.app-recommender/debtags_index"))
70   - debtags_index.load(recommender.items_repository,self.reindex)
71   -
  61 + """ Perform recommendation strategy """
  62 + profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50)
72 63 qp = xapian.QueryParser()
73   - query = qp.parse_query(best_tags)
74   - enquire = xapian.Enquire(debtags_index.index)
  64 + query = qp.parse_query(profile)
  65 + enquire = xapian.Enquire(recommender.items_repository)
  66 + enquire.set_query(query)
  67 +
  68 + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
  69 + item_score = {}
  70 + for m in mset:
  71 + item_score[m.document.get_data()] = m.rank
  72 + return RecommendationResult(item_score,20)
  73 +
  74 +class AxiContentBasedStrategy(RecommendationStrategy):
  75 + """ Content-based recommendation strategy based on Apt-xapian-index. """
  76 + def run(self,recommender,user):
  77 + """ Perform recommendation strategy """
  78 + profile = user.axi_tag_profile(recommender.items_repository,50)
  79 + query = xapian.Query(xapian.Query.OP_OR,profile)
  80 + enquire = xapian.Enquire(recommender.items_repository)
75 81 enquire.set_query(query)
76 82  
77 83 mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
... ... @@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy):
83 89 class ColaborativeStrategy(RecommendationStrategy):
84 90 """ Colaborative recommendation strategy. """
85 91 def run(self,user,users_repository,similarity_measure):
86   - """ """
  92 + """ Perform recommendation strategy """
87 93 return RecomendationResult()
88 94  
89 95 class KnowledgeBasedStrategy(RecommendationStrategy):
90 96 """ Knowledge-based recommendation strategy. """
91 97 def run(self,user,knowledge_repository):
92   - """ """
  98 + """ Perform recommendation strategy """
93 99 return RecomendationResult()
94 100  
95 101 class DemographicStrategy(RecommendationStrategy):
96 102 """ Recommendation strategy based on demographic data. """
97 103 def run(self,user,items_repository):
98   - """ """
  104 + """ Perform recommendation strategy """
99 105 return RecomendationResult()
... ...
src/user.py
... ... @@ -18,6 +18,14 @@
18 18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 19  
20 20 import commands
  21 +import xapian
  22 +
  23 +class FilterTag(xapian.ExpandDecider):
  24 + def __call__(self, term):
  25 + """
  26 + Return true if the term is a tag, else false
  27 + """
  28 + return term[:2] == "XT"
21 29  
22 30 class User:
23 31 """ """
... ... @@ -30,6 +38,26 @@ class User:
30 38 def items(self):
31 39 return self.item_score.keys()
32 40  
  41 + def axi_tag_profile(self,xapian_db,profile_size):
  42 + terms = []
  43 + for item in self.items():
  44 + terms.append("XP"+item)
  45 + query = xapian.Query(xapian.Query.OP_OR, terms)
  46 + enquire = xapian.Enquire(xapian_db)
  47 + enquire.set_query(query)
  48 + rset = xapian.RSet()
  49 + for m in enquire.get_mset(0,30000): #consider all matches
  50 + rset.add_document(m.docid)
  51 + eset = enquire.get_eset(profile_size, rset, FilterTag())
  52 + profile = []
  53 + for res in eset:
  54 + profile.append(res.term)
  55 + #print "%.2f %s" % (res.weight,res.term[2:])
  56 + return profile
  57 +
  58 + def debtags_tag_profile(self,debtags_db,profile_size):
  59 + return debtags_db.get_relevant_tags(self.items(),profile_size)
  60 +
33 61 class LocalSystem(User):
34 62 """ """
35 63 def __init__(self):
... ...