diff --git a/src/app_recommender.py b/src/app_recommender.py
index 5414700..7405d5a 100755
--- a/src/app_recommender.py
+++ b/src/app_recommender.py
@@ -26,33 +26,50 @@ from recommender import *
from strategy import *
from user import *
+# Setup configuration
DB_PATH = "/var/lib/debtags/package-tags"
INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index")
+XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
+XAPIANDB = XAPIANDBPATH + "/index"
+XAPIANDBVALUES = XAPIANDBPATH + "/values"
+
if __name__ == '__main__':
reindex = 0
+ axi = 0
if len(sys.argv) == 2:
- DB_PATH = sys.argv[1]
- reindex = 1
- print "reindex true"
+ if sys.argv[1] == "axi":
+ axi = 1
+ else:
+ DB_PATH = sys.argv[1]
+ reindex = 1
elif len(sys.argv) > 2:
print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" %
sys.argv[0])
sys.exit(1)
- debtags_db = DebtagsDB(DB_PATH)
- if not debtags_db.load(): sys.exit(1)
+ if axi:
+ axi_db = xapian.Database(XAPIANDB)
+ app_rec = Recommender(axi_db)
+ app_rec.set_strategy(AxiContentBasedStrategy())
+ else:
+ debtags_db = DebtagsDB(DB_PATH)
+ if not debtags_db.load():
+ print >> sys.stderr,("Could not load DebtagsDB from %s." % DB_PATH)
+ sys.exit(1)
+ debtags_index = DebtagsIndex(
+ os.path.expanduser("~/.app-recommender/debtags_index"))
+ debtags_index.load(debtags_db,reindex)
+ app_rec = Recommender(debtags_index)
+ app_rec.set_strategy(ContentBasedStrategy())
user = LocalSystem()
- recommender = Recommender(items_repository=debtags_db,
- strategy=ContentBasedStrategy(reindex))
-
- result = recommender.generate_recommendation(user)
+ result = app_rec.get_recommendation(user)
result.print_result()
metrics = []
metrics.append(Precision())
metrics.append(Recall())
- validation = CrossValidation(0.1,10,recommender,metrics)
+ validation = CrossValidation(0.1,10,app_rec,metrics)
validation.run(user)
diff --git a/src/data.py b/src/data.py
index 851272e..2ece072 100644
--- a/src/data.py
+++ b/src/data.py
@@ -41,9 +41,7 @@ def normalize_tags(string):
""" Normalize tag string so that it can be indexed and retrieved. """
return string.replace(':','_').replace('-','\'')
-class DataRepository:
- """ """
- # FIXME todos os repositorios devem ser singleton
+# FIXME Data repositories should be singleton
class DebtagsDB(debtags.DB):
def __init__(self,path):
@@ -68,18 +66,19 @@ class DebtagsDB(debtags.DB):
relevance_index(b)))
return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:]))
-class DebtagsIndex:
+class DebtagsIndex(xapian.WritableDatabase):
def __init__(self,path):
self.path = path
def load(self,debtags_db,reindex):
""" Load an existing debtags index. """
+ self.debtags_db = debtags_db
if not reindex:
try:
- #print ("Opening existing debtags xapian index at \'%s\'" %
- # self.path)
- self.index = xapian.Database(self.path)
- except DatabaseError:
+ print ("Opening existing debtags xapian index at \'%s\'" %
+ self.path)
+ xapian.Database.__init__(self,self.path)
+ except xapian.DatabaseError:
print "Could not open debtags xapian index"
reindex =1
if reindex:
@@ -92,11 +91,11 @@ class DebtagsIndex:
if not os.path.exists(self.path):
os.makedirs(self.path)
print "Creating new debtags xapian index at \'%s\'" % self.path
- self.index = xapian.WritableDatabase(self.path,
+ xapian.WritableDatabase.__init__(self,self.path,
xapian.DB_CREATE_OR_OVERWRITE)
for pkg,tags in debtags_db.iter_packages_tags():
doc = xapian.Document()
doc.set_data(pkg)
for tag in tags:
doc.add_term(normalize_tags(tag))
- print "indexing ",self.index.add_document(doc)
+ print "indexing ",self.add_document(doc)
diff --git a/src/evaluation.py b/src/evaluation.py
index 01dd19e..8a01603 100644
--- a/src/evaluation.py
+++ b/src/evaluation.py
@@ -139,7 +139,7 @@ class CrossValidation:
exit(1)
round_partition[random_key] = cross_item_score.pop(random_key)
round_user = User(cross_item_score)
- predicted_result = self.recommender.generate_recommendation(round_user)
+ predicted_result = self.recommender.get_recommendation(round_user)
real_result = RecommendationResult(round_partition,len(round_partition))
evaluation = Evaluation(predicted_result,real_result)
for metric in self.metrics_list:
diff --git a/src/recommender.py b/src/recommender.py
index 776c626..33301f4 100644
--- a/src/recommender.py
+++ b/src/recommender.py
@@ -35,17 +35,16 @@ class RecommendationResult:
class Recommender:
""" """
- def __init__(self,items_repository=None,users_repository=None,
- knowledge_repository=None,strategy=None):
+ def __init__(self,items_repository,users_repository=None,
+ knowledge_repository=None):
self.items_repository = items_repository
self.users_repository = users_repository
self.knowledge_repository = knowledge_repository
- self.strategy = strategy
def set_strategy(self,strategy):
""" """
self.strategy = strategy
- def generate_recommendation(self,user):
+ def get_recommendation(self,user):
""" """
return self.strategy.run(self,user)
diff --git a/src/strategy.py b/src/strategy.py
index 76259df..73d8344 100644
--- a/src/strategy.py
+++ b/src/strategy.py
@@ -17,6 +17,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
+import os, re
import xapian
from data import *
from recommender import *
@@ -51,27 +52,32 @@ class RecommendationStrategy:
class ItemReputationStrategy(RecommendationStrategy):
""" Recommendation strategy based on items reputation. """
def run(self,items_list,heuristic):
- """ """
+ """ Perform recommendation strategy """
return RecomendationResult()
class ContentBasedStrategy(RecommendationStrategy):
""" Content-based recommendation strategy. """
- #def __init__(self,items_repository):
- # self.items_repository = items_repository
- def __init__(self,reindex):
- self.reindex = reindex
-
def run(self,recommender,user):
- """ """
- best_tags = recommender.items_repository.get_relevant_tags(user.items(),
- 50)
- debtags_index = DebtagsIndex(
- os.path.expanduser("~/.app-recommender/debtags_index"))
- debtags_index.load(recommender.items_repository,self.reindex)
-
+ """ Perform recommendation strategy """
+ profile = user.debtags_tag_profile(recommender.items_repository.debtags_db,50)
qp = xapian.QueryParser()
- query = qp.parse_query(best_tags)
- enquire = xapian.Enquire(debtags_index.index)
+ query = qp.parse_query(profile)
+ enquire = xapian.Enquire(recommender.items_repository)
+ enquire.set_query(query)
+
+ mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
+ item_score = {}
+ for m in mset:
+ item_score[m.document.get_data()] = m.rank
+ return RecommendationResult(item_score,20)
+
+class AxiContentBasedStrategy(RecommendationStrategy):
+ """ Content-based recommendation strategy based on Apt-xapian-index. """
+ def run(self,recommender,user):
+ """ Perform recommendation strategy """
+ profile = user.axi_tag_profile(recommender.items_repository,50)
+ query = xapian.Query(xapian.Query.OP_OR,profile)
+ enquire = xapian.Enquire(recommender.items_repository)
enquire.set_query(query)
mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items()))
@@ -83,17 +89,17 @@ class ContentBasedStrategy(RecommendationStrategy):
class ColaborativeStrategy(RecommendationStrategy):
""" Colaborative recommendation strategy. """
def run(self,user,users_repository,similarity_measure):
- """ """
+ """ Perform recommendation strategy """
return RecomendationResult()
class KnowledgeBasedStrategy(RecommendationStrategy):
""" Knowledge-based recommendation strategy. """
def run(self,user,knowledge_repository):
- """ """
+ """ Perform recommendation strategy """
return RecomendationResult()
class DemographicStrategy(RecommendationStrategy):
""" Recommendation strategy based on demographic data. """
def run(self,user,items_repository):
- """ """
+ """ Perform recommendation strategy """
return RecomendationResult()
diff --git a/src/user.py b/src/user.py
index 788fdb9..6849173 100644
--- a/src/user.py
+++ b/src/user.py
@@ -18,6 +18,14 @@
# along with this program. If not, see .
import commands
+import xapian
+
+class FilterTag(xapian.ExpandDecider):
+ def __call__(self, term):
+ """
+ Return true if the term is a tag, else false
+ """
+ return term[:2] == "XT"
class User:
""" """
@@ -30,6 +38,26 @@ class User:
def items(self):
return self.item_score.keys()
+ def axi_tag_profile(self,xapian_db,profile_size):
+ terms = []
+ for item in self.items():
+ terms.append("XP"+item)
+ query = xapian.Query(xapian.Query.OP_OR, terms)
+ enquire = xapian.Enquire(xapian_db)
+ enquire.set_query(query)
+ rset = xapian.RSet()
+ for m in enquire.get_mset(0,30000): #consider all matches
+ rset.add_document(m.docid)
+ eset = enquire.get_eset(profile_size, rset, FilterTag())
+ profile = []
+ for res in eset:
+ profile.append(res.term)
+ #print "%.2f %s" % (res.weight,res.term[2:])
+ return profile
+
+ def debtags_tag_profile(self,debtags_db,profile_size):
+ return debtags_db.get_relevant_tags(self.items(),profile_size)
+
class LocalSystem(User):
""" """
def __init__(self):
--
libgit2 0.21.2