Commit 2e9ab843a42a3a87c77db71ac63340d6baa20c3e
Exists in
master
and in
1 other branch
Merge branch 'master' of github.com:tassia/AppRecommender
Showing
11 changed files
with
518 additions
and
416 deletions
Show diff stats
src/config.py
| ... | ... | @@ -46,8 +46,8 @@ class Config(): |
| 46 | 46 | self.popcon_index = "~/.app-recommender/popcon_index" |
| 47 | 47 | self.popcon_dir = "~/.app-recommender/popcon_dir" |
| 48 | 48 | self.clusters_dir = "~/.app-recommender/clusters_dir" |
| 49 | - self.strategy = "cta" # defaults to the cheapest one | |
| 50 | - self.reindex = 0 | |
| 49 | + self.strategy = "cb" # defaults to the cheapest one | |
| 50 | + self.weight = "bm25" | |
| 51 | 51 | self.load_options() |
| 52 | 52 | self.set_logger() |
| 53 | 53 | |
| ... | ... | @@ -63,22 +63,24 @@ class Config(): |
| 63 | 63 | print " -c, --config=PATH Path to configuration file." |
| 64 | 64 | print "" |
| 65 | 65 | print " [ recommender ]" |
| 66 | - print " -t, --tagsdb=PATH Path to debtags database." | |
| 67 | - print " -i, --tagsindex=PATH Path to debtags dedicated index." | |
| 68 | - print " -r, --force-reindex Force reindexing debtags database." | |
| 69 | 66 | print " -a, --axi=PATH Path to Apt-xapian-index." |
| 70 | 67 | print " -p, --popconindex=PATH Path to popcon dedicated index." |
| 71 | 68 | print " -m, --popcondir=PATH Path to popcon submissions dir." |
| 72 | 69 | print " -l, --clustersdir=PATH Path to popcon clusters dir." |
| 70 | + print " -w, --weight=OPTION Search weighting scheme." | |
| 73 | 71 | print " -s, --strategy=OPTION Recommendation strategy." |
| 74 | 72 | print "" |
| 73 | + print " [ weight options ] " | |
| 74 | + print " trad = traditional probabilistic weighting " | |
| 75 | + print " bm25 = bm25 weighting scheme " | |
| 76 | + print "" | |
| 75 | 77 | print " [ strategy options ] " |
| 76 | - print " ct = content-based using tags " | |
| 77 | - print " cta = content-based using tags via apt-xapian-index" | |
| 78 | - print " cp = content-based using package descriptions " | |
| 78 | + print " cb = content-based " | |
| 79 | + print " cbt = content-based using only tags as content " | |
| 80 | + print " cbd = content-based using only package descriptions as content " | |
| 79 | 81 | print " col = collaborative " |
| 80 | - print " colct = collaborative through tags content " | |
| 81 | - print " colcp = collaborative through package descriptions content " | |
| 82 | + #print " colct = collaborative through tags content " | |
| 83 | + #print " colcp = collaborative through package descriptions content " | |
| 82 | 84 | |
| 83 | 85 | def read_option(self, section, option): |
| 84 | 86 | """ |
| ... | ... | @@ -108,19 +110,17 @@ class Config(): |
| 108 | 110 | self.output_filename = self.read_option('general', 'output') |
| 109 | 111 | self.config = self.read_option('general', 'config') |
| 110 | 112 | |
| 111 | - self.tags_db = self.read_option('recommender', 'tags_db') | |
| 112 | - self.tags_index = self.read_option('recommender', 'tags_index') | |
| 113 | - self.reindex = self.read_option('recommender', 'reindex') | |
| 114 | 113 | self.axi = self.read_option('recommender', 'axi') |
| 115 | 114 | self.popcon_index = self.read_option('recommender', 'popcon_index') |
| 116 | 115 | self.popcon_dir = self.read_option('recommender', 'popcon_dir') |
| 117 | 116 | self.clusters_dir = self.read_option('recommender', 'clusters_dir') |
| 117 | + self.weight = self.read_option('recommender', 'weight') | |
| 118 | + self.strategy = self.read_option('recommender', 'strategy') | |
| 118 | 119 | |
| 119 | - short_options = "hdvo:c:t:i:ra:p:m:s:" | |
| 120 | + short_options = "hdvo:c:a:p:m:l:w:s:" | |
| 120 | 121 | long_options = ["help", "debug", "verbose", "output=", "config=", |
| 121 | - "tagsdb=", "tagsindex=", "reindex", "axi=", | |
| 122 | - "popconindex=", "popcondir=", "clustersdir=", | |
| 123 | - "strategy="] | |
| 122 | + "axi=", "popconindex=", "popcondir=", "clustersdir=", | |
| 123 | + "weight=", "strategy="] | |
| 124 | 124 | try: |
| 125 | 125 | opts, args = getopt.getopt(sys.argv[1:], short_options, |
| 126 | 126 | long_options) |
| ... | ... | @@ -142,12 +142,6 @@ class Config(): |
| 142 | 142 | self.output = p |
| 143 | 143 | elif o in ("-c", "--config"): |
| 144 | 144 | self.config = p |
| 145 | - elif o in ("-t", "--tagsdb"): | |
| 146 | - self.tags_db = p | |
| 147 | - elif o in ("-i", "--tagsindex"): | |
| 148 | - self.tags_index = p | |
| 149 | - elif o in ("-r", "--force-reindex"): | |
| 150 | - self.reindex = 1 | |
| 151 | 145 | elif o in ("-a", "--axi"): |
| 152 | 146 | self.axi = p + "/index" |
| 153 | 147 | self.axi_values = p + "/values" |
| ... | ... | @@ -157,6 +151,8 @@ class Config(): |
| 157 | 151 | self.popcon_dir = p |
| 158 | 152 | elif o in ("-l", "--clustersdir"): |
| 159 | 153 | self.popcon_dir = p |
| 154 | + elif o in ("-w", "--weight"): | |
| 155 | + self.weight = p | |
| 160 | 156 | elif o in ("-s", "--strategy"): |
| 161 | 157 | self.strategy = p |
| 162 | 158 | else: | ... | ... |
src/data.py
| ... | ... | @@ -35,29 +35,44 @@ from singleton import Singleton |
| 35 | 35 | import cluster |
| 36 | 36 | from dissimilarity import * |
| 37 | 37 | |
| 38 | -#class Item: | |
| 39 | -# """ | |
| 40 | -# Generic item definition. | |
| 41 | -# """ | |
| 42 | -# | |
| 43 | -#class Package(Item): | |
| 44 | -# """ | |
| 45 | -# Definition of a GNU/Linux application as a recommender item. | |
| 46 | -# """ | |
| 47 | -# def __init__(self,package_name): | |
| 48 | -# """ | |
| 49 | -# Set initial attributes. | |
| 50 | -# """ | |
| 51 | -# self.package_name = package_name | |
| 52 | -# | |
| 53 | -#def normalize_tags(string): | |
| 54 | -# """ | |
| 55 | -# Substitute string characters : by _ and - by '. | |
| 56 | -# Examples: | |
| 57 | -# admin::package-management -> admin__package'management | |
| 58 | -# implemented-in::c++ -> implemented-in__c++ | |
| 59 | -# """ | |
| 60 | -# return string.replace(':','_').replace('-','\'') | |
| 38 | +def axi_search_pkgs(axi,pkgs_list): | |
| 39 | + terms = ["XP"+item for item in pkgs_list] | |
| 40 | + query = xapian.Query(xapian.Query.OP_OR, terms) | |
| 41 | + enquire = xapian.Enquire(axi) | |
| 42 | + enquire.set_query(query) | |
| 43 | + matches = enquire.get_mset(0,axi.get_doccount()) | |
| 44 | + return matches | |
| 45 | + | |
| 46 | +def axi_search_pkg_tags(axi,pkg): | |
| 47 | + query = xapian.Query(xapian.Query.OP_OR, "XP"+pkg) | |
| 48 | + enquire = xapian.Enquire(axi) | |
| 49 | + enquire.set_query(query) | |
| 50 | + matches = enquire.get_mset(0,1) | |
| 51 | + for m in matches: | |
| 52 | + tags = [term.term for term in axi.get_document(m.docid).termlist() if | |
| 53 | + term.term.startswith("XT")] | |
| 54 | + return tags | |
| 55 | + | |
| 56 | +class SampleAptXapianIndex(xapian.WritableDatabase): | |
| 57 | + """ | |
| 58 | + Sample data source for packages information, mainly useful for tests. | |
| 59 | + """ | |
| 60 | + def __init__(self,pkgs_list,axi): | |
| 61 | + xapian.WritableDatabase.__init__(self,".sample_axi", | |
| 62 | + xapian.DB_CREATE_OR_OVERWRITE) | |
| 63 | + sample = axi_search_pkgs(axi,pkgs_list) | |
| 64 | + self.all_docs = [] | |
| 65 | + for package in sample: | |
| 66 | + doc_id = self.add_document(axi.get_document(package.docid)) | |
| 67 | + self.all_docs.append(doc_id) | |
| 68 | + | |
| 69 | + def _print(self): | |
| 70 | + print "---" | |
| 71 | + print xapian.WritableDatabase.__repr__(self) | |
| 72 | + print "---" | |
| 73 | + for doc_id in self.all_docs: | |
| 74 | + print [term.term for term in self.get_document(doc_id).termlist()] | |
| 75 | + print "---" | |
| 61 | 76 | |
| 62 | 77 | #[FIXME] get pkg tags from axi and remove load_debtags_db method |
| 63 | 78 | def load_debtags_db(db_path): |
| ... | ... | @@ -75,106 +90,6 @@ def load_debtags_db(db_path): |
| 75 | 90 | logging.error("Could not load DebtagsDB from '%s'." % self.db_path) |
| 76 | 91 | raise Error |
| 77 | 92 | |
| 78 | -#class TagsXapianIndex(xapian.WritableDatabase,Singleton): | |
| 79 | -# """ | |
| 80 | -# Data source for tags info defined as a singleton xapian database. | |
| 81 | -# """ | |
| 82 | -# def __init__(self,cfg): | |
| 83 | -# """ | |
| 84 | -# Set initial attributes. | |
| 85 | -# """ | |
| 86 | -# self.path = os.path.expanduser(cfg.tags_index) | |
| 87 | -# self.db_path = os.path.expanduser(cfg.tags_db) | |
| 88 | -# self.debtags_db = debtags.DB() | |
| 89 | -# try: | |
| 90 | -# db_file = open(self.db_path) | |
| 91 | -# except IOError: | |
| 92 | -# logging.error("Could not load DebtagsDB from '%s'." % self.db_path) | |
| 93 | -# raise Error | |
| 94 | -# md5 = hashlib.md5() | |
| 95 | -# md5.update(db_file.read()) | |
| 96 | -# self.db_md5 = md5.hexdigest() | |
| 97 | -# db_file.close() | |
| 98 | -# self.load_index(cfg.reindex) | |
| 99 | -# | |
| 100 | -## def load_db(self): | |
| 101 | -## """ | |
| 102 | -## Load debtags database from the source file. | |
| 103 | -## """ | |
| 104 | -## tag_filter = re.compile(r"^special::.+$|^.+::TODO$") | |
| 105 | -## try: | |
| 106 | -## db_file = open(self.db_path, "r") | |
| 107 | -## self.debtags_db.read(db_file,lambda x: not tag_filter.match(x)) | |
| 108 | -## db_file.close() | |
| 109 | -## except: | |
| 110 | -## logging.error("Could not load DebtagsDB from '%s'." % self.db_path) | |
| 111 | -## raise Error | |
| 112 | -# | |
| 113 | -# def relevant_tags_from_db(self,pkgs_list,qtd_of_tags): | |
| 114 | -# """ | |
| 115 | -# Return most relevant tags considering a list of packages. | |
| 116 | -# """ | |
| 117 | -# if not self.debtags_db.package_count(): | |
| 118 | -# #print "index vazio" | |
| 119 | -# self.debtags_db = load_debtags_db(self.db_path) | |
| 120 | -# relevant_db = self.debtags_db.choose_packages(pkgs_list) | |
| 121 | -# relevance_index = debtags.relevance_index_function(self.debtags_db, | |
| 122 | -# relevant_db) | |
| 123 | -# sorted_relevant_tags = sorted(relevant_db.iter_tags(), | |
| 124 | -# lambda a, b: cmp(relevance_index(a), | |
| 125 | -# relevance_index(b))) | |
| 126 | -# return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) | |
| 127 | -# | |
| 128 | -# def load_index(self,reindex): | |
| 129 | -# """ | |
| 130 | -# Load an existing debtags index. | |
| 131 | -# """ | |
| 132 | -# if not reindex: | |
| 133 | -# try: | |
| 134 | -# logging.info("Opening existing debtags xapian index at \'%s\'" | |
| 135 | -# % self.path) | |
| 136 | -# xapian.Database.__init__(self,self.path) | |
| 137 | -# md5 = self.get_metadata("md5") | |
| 138 | -# if not md5 == self.db_md5: | |
| 139 | -# logging.info("Index must be updated.") | |
| 140 | -# reindex = 1 | |
| 141 | -# except xapian.DatabaseError: | |
| 142 | -# logging.info("Could not open debtags index.") | |
| 143 | -# reindex =1 | |
| 144 | -# | |
| 145 | -# if reindex: | |
| 146 | -# self.new_index() | |
| 147 | -# | |
| 148 | -# def new_index(self): | |
| 149 | -# """ | |
| 150 | -# Create a xapian index for debtags info based on 'debtags_db' and | |
| 151 | -# place it at 'self.path'. | |
| 152 | -# """ | |
| 153 | -# if not os.path.exists(self.path): | |
| 154 | -# os.makedirs(self.path) | |
| 155 | -# | |
| 156 | -# try: | |
| 157 | -# logging.info("Indexing debtags info from \'%s\'" % | |
| 158 | -# self.db_path) | |
| 159 | -# logging.info("Creating new xapian index at \'%s\'" % | |
| 160 | -# self.path) | |
| 161 | -# xapian.WritableDatabase.__init__(self,self.path, | |
| 162 | -# xapian.DB_CREATE_OR_OVERWRITE) | |
| 163 | -# except xapian.DatabaseError: | |
| 164 | -# logging.critical("Could not create xapian index.") | |
| 165 | -# raise Error | |
| 166 | -# | |
| 167 | -# self.debtags_db = load_debtags_db(self.db_path) | |
| 168 | -# self.set_metadata("md5",self.db_md5) | |
| 169 | -# | |
| 170 | -# for pkg,tags in self.debtags_db.iter_packages_tags(): | |
| 171 | -# doc = xapian.Document() | |
| 172 | -# doc.set_data(pkg) | |
| 173 | -# for tag in tags: | |
| 174 | -# doc.add_term(normalize_tags(tag)) | |
| 175 | -# doc_id = self.add_document(doc) | |
| 176 | -# logging.debug("Debtags Xapian: Indexing doc %d",doc_id) | |
| 177 | - | |
| 178 | 93 | class PopconXapianIndex(xapian.WritableDatabase,Singleton): |
| 179 | 94 | """ |
| 180 | 95 | Data source for popcon submissions defined as a singleton xapian database. | ... | ... |
src/recommender.py
| ... | ... | @@ -19,10 +19,10 @@ __license__ = """ |
| 19 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 20 | 20 | """ |
| 21 | 21 | |
| 22 | -from operator import itemgetter | |
| 23 | -from data import * | |
| 24 | -from strategy import * | |
| 25 | -from error import Error | |
| 22 | +import xapian | |
| 23 | +import operator | |
| 24 | +import data | |
| 25 | +import strategy | |
| 26 | 26 | |
| 27 | 27 | class RecommendationResult: |
| 28 | 28 | """ |
| ... | ... | @@ -40,7 +40,7 @@ class RecommendationResult: |
| 40 | 40 | """ |
| 41 | 41 | result = self.get_prediction() |
| 42 | 42 | str = "\n" |
| 43 | - for i in range(len(result)): | |
| 43 | + for i in range(len((list(result)))): | |
| 44 | 44 | str += "%2d: %s\n" % (i,result[i][0]) |
| 45 | 45 | return str |
| 46 | 46 | |
| ... | ... | @@ -48,8 +48,10 @@ class RecommendationResult: |
| 48 | 48 | """ |
| 49 | 49 | Return prediction based on recommendation size (number of items). |
| 50 | 50 | """ |
| 51 | - sorted_result = sorted(self.item_score.items(), key=itemgetter(1)) | |
| 52 | - return reversed(sorted_result[-size:]) | |
| 51 | + if size > len(self.item_score): size = len(self.item_score) | |
| 52 | + sorted_result = sorted(self.item_score.items(), | |
| 53 | + key=operator.itemgetter(1)) | |
| 54 | + return list(reversed(sorted_result[-size:])) | |
| 53 | 55 | |
| 54 | 56 | class Recommender: |
| 55 | 57 | """ |
| ... | ... | @@ -59,47 +61,30 @@ class Recommender: |
| 59 | 61 | """ |
| 60 | 62 | Set initial parameters. |
| 61 | 63 | """ |
| 62 | - try: | |
| 63 | - strategy = "self."+cfg.strategy+"(cfg)" | |
| 64 | - exec(strategy) | |
| 65 | - except (NameError, AttributeError, SyntaxError) as err: | |
| 66 | - print err | |
| 67 | - logging.critical("Could not perform recommendation strategy '%s'" % | |
| 68 | - cfg.strategy) | |
| 69 | - raise Error | |
| 70 | - | |
| 71 | - def ct(self,cfg): | |
| 72 | - """ | |
| 73 | - Set recommender attributes to perform content-based recommendation | |
| 74 | - using tags index as source data. | |
| 75 | - """ | |
| 76 | - self.items_repository = TagsXapianIndex(cfg) | |
| 77 | - self.strategy = ContentBasedStrategy() | |
| 78 | - | |
| 79 | - def cta(self,cfg): | |
| 80 | - """ | |
| 81 | - Set recommender attributes to perform content-based recommendation | |
| 82 | - using apt-xapian-index as source data. | |
| 83 | - """ | |
| 84 | 64 | self.items_repository = xapian.Database(cfg.axi) |
| 85 | - self.strategy = AxiContentBasedStrategy() | |
| 86 | - | |
| 87 | - def col(self,cfg): | |
| 88 | - """ | |
| 89 | - Set recommender attributes to perform collaborative recommendation | |
| 90 | - using popcon-xapian-index as source data. | |
| 91 | - """ | |
| 92 | - self.users_repository = PopconXapianIndex(cfg) | |
| 93 | - self.strategy = CollaborativeStrategy() | |
| 65 | + self.users_repository = data.PopconXapianIndex(cfg) #[FIXME] only cfg fields | |
| 66 | + self.clustered_users_repository = data.PopconXapianIndex(cfg) #[FIXME] | |
| 67 | + self.set_strategy(cfg.strategy) | |
| 68 | + if cfg.weight == "bm25": | |
| 69 | + self.weight = xapian.BM25Weight() | |
| 70 | + else: | |
| 71 | + self.weight = xapian.TradWeight() | |
| 94 | 72 | |
| 95 | - def set_strategy(self,strategy): | |
| 73 | + def set_strategy(self,strategy_str): | |
| 96 | 74 | """ |
| 97 | 75 | Set the recommendation strategy. |
| 98 | 76 | """ |
| 99 | - self.strategy = strategy | |
| 77 | + if strategy_str == "cb": | |
| 78 | + self.strategy = strategy.ContentBasedStrategy("full") | |
| 79 | + if strategy_str == "cbt": | |
| 80 | + self.strategy = strategy.ContentBasedStrategy("tag") | |
| 81 | + if strategy_str == "cbd": | |
| 82 | + self.strategy = strategy.ContentBasedStrategy("desc") | |
| 83 | + if strategy_str == "col": | |
| 84 | + self.strategy = strategy.CollaborativeStrategy(20) | |
| 100 | 85 | |
| 101 | - def get_recommendation(self,user): | |
| 86 | + def get_recommendation(self,user,limit=20): | |
| 102 | 87 | """ |
| 103 | 88 | Produces recommendation using previously loaded strategy. |
| 104 | 89 | """ |
| 105 | - return self.strategy.run(self,user) | |
| 90 | + return self.strategy.run(self,user,limit) | ... | ... |
src/strategy.py
| ... | ... | @@ -20,54 +20,27 @@ __license__ = """ |
| 20 | 20 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 21 | 21 | """ |
| 22 | 22 | |
| 23 | -import string | |
| 24 | -import os, re | |
| 25 | 23 | import xapian |
| 26 | -from data import * | |
| 27 | 24 | from singleton import Singleton |
| 28 | 25 | import recommender |
| 29 | - | |
| 30 | -class ReputationHeuristic(Singleton): | |
| 31 | - """ | |
| 32 | - Abstraction for diferent reputation heuristics. | |
| 33 | - """ | |
| 34 | - pass | |
| 35 | - | |
| 36 | -class BugsHeuristic(ReputationHeuristic): | |
| 37 | - """ | |
| 38 | - Reputation heuristic based on quantity of open bugs. | |
| 39 | - """ | |
| 40 | - pass | |
| 41 | - | |
| 42 | -class RCBugsHeuristic(ReputationHeuristic): | |
| 43 | - """ | |
| 44 | - Reputation heuristic based on quantity of RC bugs. | |
| 45 | - """ | |
| 46 | - pass | |
| 47 | - | |
| 48 | -class PopularityHeuristic(ReputationHeuristic): | |
| 49 | - """ | |
| 50 | - Reputation heuristic based on popularity of packages. | |
| 51 | - """ | |
| 52 | - pass | |
| 26 | +from data import * | |
| 53 | 27 | |
| 54 | 28 | class PkgMatchDecider(xapian.MatchDecider): |
| 55 | 29 | """ |
| 56 | 30 | Extend xapian.MatchDecider to not consider installed packages. |
| 57 | 31 | """ |
| 58 | - | |
| 59 | - def __init__(self, installed_pkgs): | |
| 32 | + def __init__(self, pkgs_list): | |
| 60 | 33 | """ |
| 61 | 34 | Set initial parameters. |
| 62 | 35 | """ |
| 63 | 36 | xapian.MatchDecider.__init__(self) |
| 64 | - self.installed_pkgs = installed_pkgs | |
| 37 | + self.pkgs_list = pkgs_list | |
| 65 | 38 | |
| 66 | 39 | def __call__(self, doc): |
| 67 | 40 | """ |
| 68 | 41 | True if the package is not already installed. |
| 69 | 42 | """ |
| 70 | - return doc.get_data() not in self.installed_pkgs | |
| 43 | + return doc.get_data() not in self.pkgs_list | |
| 71 | 44 | |
| 72 | 45 | class UserMatchDecider(xapian.MatchDecider): |
| 73 | 46 | """ |
| ... | ... | @@ -80,51 +53,35 @@ class UserMatchDecider(xapian.MatchDecider): |
| 80 | 53 | """ |
| 81 | 54 | xapian.MatchDecider.__init__(self) |
| 82 | 55 | self.profile = profile |
| 83 | - print "mdecider:",profile | |
| 84 | 56 | |
| 85 | 57 | def __call__(self, doc): |
| 86 | 58 | """ |
| 87 | 59 | True if the user has more the half of packages from profile. |
| 88 | 60 | """ |
| 89 | - profile_size = len(self.profile) | |
| 90 | - pkg_match=0 | |
| 61 | + match=0 | |
| 91 | 62 | for term in doc: |
| 92 | 63 | if term.term in self.profile: |
| 93 | - pkg_match = pkg_match+1 | |
| 94 | - print "id",doc.get_docid(),"match",pkg_match | |
| 95 | - return pkg_match >= profile_size/2 | |
| 64 | + match = match+1 | |
| 65 | + return (match >= len(self.profile)/2) | |
| 96 | 66 | |
| 97 | 67 | class PkgExpandDecider(xapian.ExpandDecider): |
| 98 | 68 | """ |
| 99 | 69 | Extend xapian.ExpandDecider to consider packages only. |
| 100 | 70 | """ |
| 101 | - | |
| 102 | - def __init__(self): | |
| 103 | - """ | |
| 104 | - Call base class init. | |
| 105 | - """ | |
| 106 | - xapian.ExpandDecider.__init__(self) | |
| 107 | - | |
| 108 | 71 | def __call__(self, term): |
| 109 | 72 | """ |
| 110 | 73 | True if the term is a package. |
| 111 | 74 | """ |
| 75 | + # [FIXME] return term.startswith("XP") | |
| 112 | 76 | return not term.startswith("XT") |
| 113 | 77 | |
| 114 | 78 | class TagExpandDecider(xapian.ExpandDecider): |
| 115 | 79 | """ |
| 116 | 80 | Extend xapian.ExpandDecider to consider tags only. |
| 117 | 81 | """ |
| 118 | - | |
| 119 | - def __init__(self, profile): | |
| 120 | - """ | |
| 121 | - Call base class init. | |
| 122 | - """ | |
| 123 | - xapian.ExpandDecider.__init__(self) | |
| 124 | - | |
| 125 | - def __call__(self, doc): | |
| 82 | + def __call__(self, term): | |
| 126 | 83 | """ |
| 127 | - True if the user has more the half of packages from profile. | |
| 84 | + True if the term is a tag. | |
| 128 | 85 | """ |
| 129 | 86 | return term.startswith("XT") |
| 130 | 87 | |
| ... | ... | @@ -134,65 +91,30 @@ class RecommendationStrategy: |
| 134 | 91 | """ |
| 135 | 92 | pass |
| 136 | 93 | |
| 137 | -class ItemReputationStrategy(RecommendationStrategy): | |
| 138 | - """ | |
| 139 | - Recommendation strategy based on items reputation. | |
| 140 | - """ | |
| 141 | - def run(self,items_list,heuristic): | |
| 142 | - """ | |
| 143 | - Perform recommendation strategy. | |
| 144 | - """ | |
| 145 | - logging.critical("Item reputation recommendation strategy is not yet implemented.") | |
| 146 | - raise Error | |
| 147 | - | |
| 148 | -#class ContentBasedStrategy(RecommendationStrategy): | |
| 149 | -# """ | |
| 150 | -# Content-based recommendation strategy. | |
| 151 | -# """ | |
| 152 | -# def run(self,rec,user): | |
| 153 | -# """ | |
| 154 | -# Perform recommendation strategy. | |
| 155 | -# """ | |
| 156 | -# profile = user.txi_tag_profile(rec.items_repository,50) | |
| 157 | -# qp = xapian.QueryParser() | |
| 158 | -# query = qp.parse_query(profile) | |
| 159 | -# enquire = xapian.Enquire(rec.items_repository) | |
| 160 | -# enquire.set_query(query) | |
| 161 | -# | |
| 162 | -# try: | |
| 163 | -# mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 164 | -# except xapian.DatabaseError as error: | |
| 165 | -# logging.critical(error.get_msg()) | |
| 166 | -# raise Error | |
| 167 | -# | |
| 168 | -# item_score = {} | |
| 169 | -# for m in mset: | |
| 170 | -# item_score[m.document.get_data()] = m.rank | |
| 171 | -# return recommender.RecommendationResult(item_score,20) | |
| 172 | - | |
| 173 | -class AxiContentBasedStrategy(RecommendationStrategy): | |
| 94 | +class ContentBasedStrategy(RecommendationStrategy): | |
| 174 | 95 | """ |
| 175 | 96 | Content-based recommendation strategy based on Apt-xapian-index. |
| 176 | 97 | """ |
| 177 | - def __init__(self): | |
| 98 | + def __init__(self,content): | |
| 178 | 99 | self.description = "Content-based" |
| 100 | + self.content = content | |
| 179 | 101 | |
| 180 | - def run(self,rec,user): | |
| 102 | + def run(self,rec,user,limit): | |
| 181 | 103 | """ |
| 182 | 104 | Perform recommendation strategy. |
| 183 | 105 | """ |
| 184 | - profile = user.axi_tag_profile(rec.items_repository,50) | |
| 185 | - #profile_str = string.join(list(profile),' ') | |
| 186 | - query = xapian.Query(xapian.Query.OP_OR,list(profile)) | |
| 106 | + profile = user.profile(rec.items_repository,self.content,50) | |
| 107 | + # prepair index for querying user profile | |
| 108 | + query = xapian.Query(xapian.Query.OP_OR,profile) | |
| 187 | 109 | enquire = xapian.Enquire(rec.items_repository) |
| 110 | + enquire.set_weighting_scheme(rec.weight) | |
| 188 | 111 | enquire.set_query(query) |
| 189 | - | |
| 190 | 112 | try: |
| 191 | - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(user.items())) | |
| 113 | + # retrieve matching packages | |
| 114 | + mset = enquire.get_mset(0, limit, None, PkgMatchDecider(user.items())) | |
| 192 | 115 | except xapian.DatabaseError as error: |
| 193 | - logging.critical(error.get_msg()) | |
| 194 | - raise Error | |
| 195 | - | |
| 116 | + logging.critical("Content-based strategy: "+error.get_msg()) | |
| 117 | + # compose result dictionary | |
| 196 | 118 | item_score = {} |
| 197 | 119 | for m in mset: |
| 198 | 120 | item_score[m.document.get_data()] = m.weight |
| ... | ... | @@ -202,66 +124,107 @@ class CollaborativeStrategy(RecommendationStrategy): |
| 202 | 124 | """ |
| 203 | 125 | Colaborative recommendation strategy. |
| 204 | 126 | """ |
| 205 | - def __init__(self): | |
| 127 | + def __init__(self,k,clustering=1): | |
| 206 | 128 | self.description = "Collaborative" |
| 129 | + self.clustering = clustering | |
| 130 | + self.neighbours = k | |
| 207 | 131 | |
| 208 | - #def run(self,rec,user,similarity_measure): | |
| 209 | - def run(self,rec,user): | |
| 132 | + def run(self,rec,user,limit): | |
| 210 | 133 | """ |
| 211 | 134 | Perform recommendation strategy. |
| 212 | 135 | """ |
| 213 | - profile = user.maximal_pkg_profile() | |
| 214 | - #profile_str = string.join(list(profile),' ') | |
| 215 | - query = xapian.Query(xapian.Query.OP_OR,list(profile)) | |
| 216 | - enquire = xapian.Enquire(rec.users_repository) | |
| 136 | + profile = user.pkg_profile | |
| 137 | + # prepair index for querying user profile | |
| 138 | + query = xapian.Query(xapian.Query.OP_OR,profile) | |
| 139 | + if self.clustering: | |
| 140 | + enquire = xapian.Enquire(rec.clustered_users_repository) | |
| 141 | + else: | |
| 142 | + enquire = xapian.Enquire(rec.users_repository) | |
| 143 | + enquire.set_weighting_scheme(rec.weight) | |
| 217 | 144 | enquire.set_query(query) |
| 218 | - | |
| 219 | 145 | try: |
| 220 | - #mset = enquire.get_mset(0, 182, None, UserMatchDecider(profile)) | |
| 221 | - mset = enquire.get_mset(0, 20) | |
| 146 | + # retrieve matching users | |
| 147 | + mset = enquire.get_mset(0, self.neighbours) | |
| 222 | 148 | except xapian.DatabaseError as error: |
| 223 | - logging.critical(error.get_msg()) | |
| 224 | - raise Error | |
| 225 | - | |
| 149 | + logging.critical("Collaborative strategy: "+error.get_msg()) | |
| 226 | 150 | rset = xapian.RSet() |
| 151 | + logging.debug("Neighborhood composed by the following users (by hash)") | |
| 227 | 152 | for m in mset: |
| 228 | 153 | rset.add_document(m.document.get_docid()) |
| 229 | - logging.debug("Counting as relevant submission %s" % | |
| 230 | - m.document.get_data()) | |
| 231 | - | |
| 232 | - eset = enquire.get_eset(20,rset,PkgExpandDecider()) | |
| 233 | - rank = 0 | |
| 154 | + logging.debug(m.document.get_data()) | |
| 155 | + # retrieve most relevant packages | |
| 156 | + eset = enquire.get_eset(limit,rset,PkgExpandDecider()) | |
| 157 | + # compose result dictionary | |
| 234 | 158 | item_score = {} |
| 235 | - for term in eset: | |
| 236 | - item_score[term.term] = rank | |
| 237 | - rank = rank+1 | |
| 238 | - | |
| 159 | + for package in eset: | |
| 160 | + item_score[package.term.lstrip("XP")] = package.weight | |
| 239 | 161 | return recommender.RecommendationResult(item_score) |
| 240 | 162 | |
| 163 | +class DemographicStrategy(RecommendationStrategy): | |
| 164 | + """ | |
| 165 | + Recommendation strategy based on demographic data. | |
| 166 | + """ | |
| 167 | + def __init__(self): | |
| 168 | + self.description = "Demographic" | |
| 169 | + logging.debug("Demographic recommendation not yet implemented.") | |
| 170 | + raise Error | |
| 171 | + | |
| 172 | + def run(self,user,items_repository): | |
| 173 | + """ | |
| 174 | + Perform recommendation strategy. | |
| 175 | + """ | |
| 176 | + pass | |
| 177 | + | |
| 241 | 178 | class KnowledgeBasedStrategy(RecommendationStrategy): |
| 242 | 179 | """ |
| 243 | 180 | Knowledge-based recommendation strategy. |
| 244 | 181 | """ |
| 245 | 182 | def __init__(self): |
| 246 | 183 | self.description = "Knowledge-based" |
| 184 | + logging.debug("Knowledge-based recommendation not yet implemented.") | |
| 185 | + raise Error | |
| 247 | 186 | |
| 248 | 187 | def run(self,user,knowledge_repository): |
| 249 | 188 | """ |
| 250 | 189 | Perform recommendation strategy. |
| 251 | 190 | """ |
| 252 | - logging.critical("Knowledge-based recommendation strategy is not yet implemented.") | |
| 253 | - raise Error | |
| 191 | + pass | |
| 254 | 192 | |
| 255 | -class DemographicStrategy(RecommendationStrategy): | |
| 193 | +class ReputationHeuristic(Singleton): | |
| 256 | 194 | """ |
| 257 | - Recommendation strategy based on demographic data. | |
| 195 | + Abstraction for diferent reputation heuristics. | |
| 196 | + """ | |
| 197 | + pass | |
| 198 | + | |
| 199 | +class BugsHeuristic(ReputationHeuristic): | |
| 200 | + """ | |
| 201 | + Reputation heuristic based on quantity of open bugs. | |
| 202 | + """ | |
| 203 | + pass | |
| 204 | + | |
| 205 | +class RCBugsHeuristic(ReputationHeuristic): | |
| 206 | + """ | |
| 207 | + Reputation heuristic based on quantity of RC bugs. | |
| 208 | + """ | |
| 209 | + pass | |
| 210 | + | |
| 211 | +class PopularityHeuristic(ReputationHeuristic): | |
| 212 | + """ | |
| 213 | + Reputation heuristic based on popularity of packages. | |
| 214 | + """ | |
| 215 | + pass | |
| 216 | + | |
| 217 | +class ItemReputationStrategy(RecommendationStrategy): | |
| 218 | + """ | |
| 219 | + Recommendation strategy based on items reputation. | |
| 258 | 220 | """ |
| 259 | 221 | def __init__(self): |
| 260 | - self.description = "Demographic" | |
| 222 | + self.description = "Item reputation" | |
| 223 | + logging.debug("Item reputation recommendation not yet implemented.") | |
| 224 | + raise Error | |
| 261 | 225 | |
| 262 | - def run(self,user,items_repository): | |
| 226 | + def run(self,items_list,heuristic): | |
| 263 | 227 | """ |
| 264 | 228 | Perform recommendation strategy. |
| 265 | 229 | """ |
| 266 | - logging.critical("Demographic recommendation strategy is not yet implemented.") | |
| 267 | - raise Error | |
| 230 | + pass | ... | ... |
src/tests/package-xapian-index
| ... | ... | @@ -1,10 +0,0 @@ |
| 1 | -aaphoto: implemented-in::c, interface::commandline, role::program, use::editing, works-with::image | |
| 2 | -dia: implemented-in::c, interface::x11, role::program, scope::application, suite::gnu, uitoolkit::gtk, use::editing, works-with::image, works-with::image:vector, x11::application | |
| 3 | -eog: implemented-in::c, interface::x11, role::program, scope::application, suite::gnome, uitoolkit::gtk, use::viewing, works-with-format::jpg, works-with-format::png, works-with::image, works-with::image:raster, works-with::image:vector, x11::application | |
| 4 | -emacs: devel::editor, role::dummy, role::metapackage, special::meta, suite::emacs, suite::gnu, use::editing | |
| 5 | -ferret: devel::modelling, role::program, scope::application, suite::gnu, works-with::db | |
| 6 | -festival: accessibility::speech, devel::interpreter, implemented-in::scheme, interface::text-mode, network::client, network::server, role::program, sound::speech, uitoolkit::ncurses, works-with::audio | |
| 7 | -file: admin::forensics, implemented-in::c, interface::commandline, role::program, scope::utility, use::analysing, use::scanning, works-with::file | |
| 8 | -gimp: implemented-in::c, interface::x11, role::program, scope::application, suite::gimp, suite::gnu, uitoolkit::gtk, use::editing, works-with-format::gif, works-with-format::jpg, works-with-format::pdf, works-with-format::png, works-with-format::tiff, works-with::image, works-with::image:raster, works-with::text, x11::application | |
| 9 | -inkscape: implemented-in::c, implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::gtk, use::editing, works-with-format::pdf, works-with-format::postscript, works-with-format::svg, works-with-format::tex, works-with::image, works-with::image:vector, x11::application | |
| 10 | -xpdf: implemented-in::c++, interface::x11, role::program, scope::application, uitoolkit::motif, use::viewing, works-with-format::pdf, works-with::text, x11::application |
| ... | ... | @@ -0,0 +1,69 @@ |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | + recommenderTests - Recommender class test case | |
| 4 | +""" | |
| 5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
| 6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
| 7 | +__license__ = """ | |
| 8 | + This program is free software: you can redistribute it and/or modify | |
| 9 | + it under the terms of the GNU General Public License as published by | |
| 10 | + the Free Software Foundation, either version 3 of the License, or | |
| 11 | + (at your option) any later version. | |
| 12 | + | |
| 13 | + This program is distributed in the hope that it will be useful, | |
| 14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 16 | + GNU General Public License for more details. | |
| 17 | + | |
| 18 | + You should have received a copy of the GNU General Public License | |
| 19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 20 | +""" | |
| 21 | + | |
| 22 | +import unittest2 | |
| 23 | +import sys | |
| 24 | +sys.path.insert(0,'../') | |
| 25 | +from recommender import RecommendationResult, Recommender | |
| 26 | +from user import User | |
| 27 | +from config import Config | |
| 28 | +from strategy import ContentBasedStrategy, CollaborativeStrategy | |
| 29 | + | |
| 30 | +class RecommendationResultTests(unittest2.TestCase): | |
| 31 | + @classmethod | |
| 32 | + def setUpClass(self): | |
| 33 | + self.result = RecommendationResult({"gimp":1.5,"inkscape":3.0,"eog":1}) | |
| 34 | + | |
| 35 | + def test_str(self): | |
| 36 | + string = "\n 0: inkscape\n 1: gimp\n 2: eog\n" | |
| 37 | + self.assertEqual(self.result.__str__(),string) | |
| 38 | + | |
| 39 | + def test_get_prediction(self): | |
| 40 | + prediction = [("inkscape",3.0),("gimp",1.5),("eog",1)] | |
| 41 | + self.assertEqual(self.result.get_prediction(),prediction) | |
| 42 | + | |
| 43 | +class RecommenderTests(unittest2.TestCase): | |
| 44 | + @classmethod | |
| 45 | + def setUpClass(self): | |
| 46 | + cfg = Config() | |
| 47 | + self.rec = Recommender(cfg) | |
| 48 | + | |
| 49 | + def test_set_strategy(self): | |
| 50 | + self.rec.set_strategy("cb") | |
| 51 | + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) | |
| 52 | + self.assertEqual(self.rec.strategy.content,"full") | |
| 53 | + self.rec.set_strategy("cbt") | |
| 54 | + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) | |
| 55 | + self.assertEqual(self.rec.strategy.content,"tag") | |
| 56 | + self.rec.set_strategy("cbd") | |
| 57 | + self.assertIsInstance(self.rec.strategy,ContentBasedStrategy) | |
| 58 | + self.assertEqual(self.rec.strategy.content,"desc") | |
| 59 | + self.rec.set_strategy("col") | |
| 60 | + self.assertIsInstance(self.rec.strategy,CollaborativeStrategy) | |
| 61 | + | |
| 62 | + def test_get_recommendation(self): | |
| 63 | + user = User({"inkscape": 1, "gimp": 1, "eog":1}) | |
| 64 | + result = self.rec.get_recommendation(user) | |
| 65 | + self.assertIsInstance(result, RecommendationResult) | |
| 66 | + self.assertGreater(len(result.item_score),0) | |
| 67 | + | |
| 68 | +if __name__ == '__main__': | |
| 69 | + unittest2.main() | ... | ... |
src/tests/runner.py
| 1 | 1 | #!/usr/bin/env python |
| 2 | 2 | """ |
| 3 | - tests - execution of the whole set of tests suites. | |
| 3 | + runner - Run the whole set of test cases suites. | |
| 4 | 4 | """ |
| 5 | 5 | __author__ = "Tassia Camoes Araujo <tassia@gmail.com>" |
| 6 | 6 | __copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" |
| ... | ... | @@ -20,9 +20,30 @@ __license__ = """ |
| 20 | 20 | """ |
| 21 | 21 | |
| 22 | 22 | import unittest2 |
| 23 | -import user_tests | |
| 24 | -import singleton_tests | |
| 23 | +from user_tests import UserTests, FilterTagTests, FilterDescriptionTests | |
| 24 | +from recommender_tests import RecommendationResultTests, RecommenderTests | |
| 25 | +from strategy_tests import (PkgMatchDeciderTests, UserMatchDeciderTests, | |
| 26 | + PkgExpandDeciderTests, TagExpandDeciderTests, ContentBasedStrategyTests, | |
| 27 | + CollaborativeStrategyTests, DemographicStrategyTests, | |
| 28 | + KnowledgeBasedStrategyTests, ItemReputationStrategyTests) | |
| 29 | +from singleton_tests import SingletonTests | |
| 30 | + | |
| 31 | +def load_tests(test_cases): | |
| 32 | + suite = unittest2.TestSuite() | |
| 33 | + for test_class in test_cases: | |
| 34 | + tests = unittest2.TestLoader().loadTestsFromTestCase(test_class) | |
| 35 | + suite.addTests(tests) | |
| 36 | + return suite | |
| 37 | + | |
| 38 | +test_lists = [[UserTests, FilterTagTests, FilterDescriptionTests], | |
| 39 | + [RecommendationResultTests, RecommenderTests], | |
| 40 | + [PkgMatchDeciderTests, UserMatchDeciderTests, | |
| 41 | + PkgExpandDeciderTests, TagExpandDeciderTests, | |
| 42 | + ContentBasedStrategyTests, CollaborativeStrategyTests, | |
| 43 | + DemographicStrategyTests, KnowledgeBasedStrategyTests, | |
| 44 | + ItemReputationStrategyTests], | |
| 45 | + [SingletonTests]] | |
| 25 | 46 | |
| 26 | 47 | runner = unittest2.TextTestRunner() |
| 27 | -runner.run(user_tests.suite()) | |
| 28 | -runner.run(singleton_tests.suite()) | |
| 48 | +for module in test_lists: | |
| 49 | + runner.run(load_tests(module)) | ... | ... |
src/tests/singleton_tests.py
| ... | ... | @@ -24,9 +24,6 @@ import sys |
| 24 | 24 | sys.path.insert(0,'../') |
| 25 | 25 | from singleton import Singleton |
| 26 | 26 | |
| 27 | -def suite(): | |
| 28 | - return unittest2.TestLoader().loadTestsFromTestCase(SingletonTests) | |
| 29 | - | |
| 30 | 27 | class SingletonTests(unittest2.TestCase): |
| 31 | 28 | def test_creation(self): |
| 32 | 29 | object_1 = Singleton() | ... | ... |
| ... | ... | @@ -0,0 +1,116 @@ |
| 1 | +#!/usr/bin/env python | |
| 2 | +""" | |
| 3 | + strategyTests - Recommendation strategies classes test case | |
| 4 | +""" | |
| 5 | +__author__ = "Tassia Camoes Araujo <tassia@gmail.com>" | |
| 6 | +__copyright__ = "Copyright (C) 2011 Tassia Camoes Araujo" | |
| 7 | +__license__ = """ | |
| 8 | + This program is free software: you can redistribute it and/or modify | |
| 9 | + it under the terms of the GNU General Public License as published by | |
| 10 | + the Free Software Foundation, either version 3 of the License, or | |
| 11 | + (at your option) any later version. | |
| 12 | + | |
| 13 | + This program is distributed in the hope that it will be useful, | |
| 14 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 15 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 16 | + GNU General Public License for more details. | |
| 17 | + | |
| 18 | + You should have received a copy of the GNU General Public License | |
| 19 | + along with this program. If not, see <http://www.gnu.org/licenses/>. | |
| 20 | +""" | |
| 21 | + | |
| 22 | +import unittest2 | |
| 23 | +import xapian | |
| 24 | +import sys | |
| 25 | +sys.path.insert(0,'../') | |
| 26 | +from error import Error | |
| 27 | +from user import User | |
| 28 | +from recommender import RecommendationResult | |
| 29 | +from config import * | |
| 30 | +#from data import * | |
| 31 | +from strategy import (PkgMatchDecider, UserMatchDecider, PkgExpandDecider, | |
| 32 | + TagExpandDecider, ContentBasedStrategy, | |
| 33 | + CollaborativeStrategy, DemographicStrategy, | |
| 34 | + KnowledgeBasedStrategy, ItemReputationStrategy) | |
| 35 | + | |
| 36 | +class PkgMatchDeciderTests(unittest2.TestCase): | |
| 37 | + @classmethod | |
| 38 | + def setUpClass(self): | |
| 39 | + pkgs_list = ["gimp","eog","inkscape"] | |
| 40 | + self.decider = PkgMatchDecider(pkgs_list) | |
| 41 | + self.doc = xapian.Document() | |
| 42 | + | |
| 43 | + def test_match(self): | |
| 44 | + self.doc.set_data("emacs") | |
| 45 | + self.assertTrue(self.decider(self.doc)) | |
| 46 | + | |
| 47 | + def test_no_match(self): | |
| 48 | + self.doc.set_data("gimp") | |
| 49 | + self.assertFalse(self.decider(self.doc)) | |
| 50 | + | |
| 51 | +class UserMatchDeciderTests(unittest2.TestCase): | |
| 52 | + @classmethod | |
| 53 | + def setUpClass(self): | |
| 54 | + user_profile = ["gimp","eog","inkscape", "emacs"] | |
| 55 | + self.decider = UserMatchDecider(user_profile) | |
| 56 | + | |
| 57 | + def setUp(self): | |
| 58 | + self.doc = xapian.Document() | |
| 59 | + | |
| 60 | + def test_match(self): | |
| 61 | + self.doc.add_term("emacs") | |
| 62 | + self.doc.add_term("gimp") | |
| 63 | + self.doc.add_term("eog") | |
| 64 | + self.assertTrue(self.decider(self.doc)) | |
| 65 | + | |
| 66 | + def test_no_match(self): | |
| 67 | + self.doc.add_term("gimp") | |
| 68 | + self.assertFalse(self.decider(self.doc)) | |
| 69 | + | |
| 70 | +class PkgExpandDeciderTests(unittest2.TestCase): | |
| 71 | + @classmethod | |
| 72 | + def setUpClass(self): | |
| 73 | + self.decider = PkgExpandDecider() | |
| 74 | + | |
| 75 | + def test_match(self): | |
| 76 | + self.assertTrue(self.decider("XPgimp")) | |
| 77 | + | |
| 78 | + def test_no_match(self): | |
| 79 | + self.assertFalse(self.decider("XTgimp")) | |
| 80 | + | |
| 81 | +class TagExpandDeciderTests(unittest2.TestCase): | |
| 82 | + @classmethod | |
| 83 | + def setUpClass(self): | |
| 84 | + self.decider = TagExpandDecider() | |
| 85 | + | |
| 86 | + def test_match(self): | |
| 87 | + self.assertTrue(self.decider("XTgimp")) | |
| 88 | + | |
| 89 | + def test_no_match(self): | |
| 90 | + self.assertFalse(self.decider("gimp")) | |
| 91 | + | |
| 92 | +class ContentBasedStrategyTests(unittest2.TestCase): | |
| 93 | + @classmethod | |
| 94 | + def setUpClass(self): | |
| 95 | + | |
| 96 | + pass | |
| 97 | + | |
| 98 | +class CollaborativeStrategyTests(unittest2.TestCase): | |
| 99 | + @classmethod | |
| 100 | + def setUpClass(self): | |
| 101 | + pass | |
| 102 | + | |
| 103 | +class DemographicStrategyTests(unittest2.TestCase): | |
| 104 | + def test_call(self): | |
| 105 | + self.assertRaises(Error,lambda: DemographicStrategy()) | |
| 106 | + | |
| 107 | +class KnowledgeBasedStrategyTests(unittest2.TestCase): | |
| 108 | + def test_call(self): | |
| 109 | + self.assertRaises(Error,lambda: KnowledgeBasedStrategy()) | |
| 110 | + | |
| 111 | +class ItemReputationStrategyTests(unittest2.TestCase): | |
| 112 | + def test_call(self): | |
| 113 | + self.assertRaises(Error,lambda: ItemReputationStrategy()) | |
| 114 | + | |
| 115 | +if __name__ == '__main__': | |
| 116 | + unittest2.main() | ... | ... |
src/tests/user_tests.py
| ... | ... | @@ -19,26 +19,39 @@ __license__ = """ |
| 19 | 19 | along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 20 | 20 | """ |
| 21 | 21 | |
| 22 | -import operator | |
| 23 | -import math | |
| 24 | 22 | import unittest2 |
| 25 | 23 | import xapian |
| 26 | 24 | import sys |
| 27 | 25 | sys.path.insert(0,'../') |
| 28 | -from user import * | |
| 29 | -from config import * | |
| 30 | -from data import * | |
| 26 | +from user import User, FilterTag, FilterDescription | |
| 27 | +from config import Config | |
| 28 | +from data import SampleAptXapianIndex | |
| 31 | 29 | |
| 32 | -def suite(): | |
| 33 | - return unittest2.TestLoader().loadTestsFromTestCase(UserTests) | |
| 30 | +class FilterTagTests(unittest2.TestCase): | |
| 31 | + def test_call_true(self): | |
| 32 | + self.assertTrue(FilterTag()("XTrole::program")) | |
| 33 | + | |
| 34 | + def test_call_false(self): | |
| 35 | + self.assertFalse(FilterTag()("role::program")) | |
| 36 | + | |
| 37 | +class FilterDescriptionTests(unittest2.TestCase): | |
| 38 | + def test_call_true(self): | |
| 39 | + self.assertTrue(FilterDescription()("program")) | |
| 40 | + #self.assertTrue(FilterDescription()("Zprogram")) | |
| 41 | + | |
| 42 | + def test_call_false(self): | |
| 43 | + self.assertFalse(FilterDescription()("XTprogram")) | |
| 34 | 44 | |
| 35 | 45 | class UserTests(unittest2.TestCase): |
| 36 | 46 | @classmethod |
| 37 | 47 | def setUpClass(self): |
| 38 | 48 | cfg = Config() |
| 39 | - #self.axi = xapian.Database(cfg.axi) | |
| 49 | + self.axi = xapian.Database(cfg.axi) | |
| 50 | + sample_packages = ["gimp","aaphoto","eog","emacs","dia","ferret", | |
| 51 | + "festival","file","inkscape","xpdf"] | |
| 52 | + self.sample_axi = SampleAptXapianIndex(sample_packages,self.axi) | |
| 40 | 53 | self.user = User({"gimp":1,"aaphoto":1,"eog":1,"emacs":1}) |
| 41 | - self.pxi = PkgXapianIndex("package-xapian-index") | |
| 54 | + #self.sample_axi._print() | |
| 42 | 55 | |
| 43 | 56 | def test_hash(self): |
| 44 | 57 | new_user = User(dict()) |
| ... | ... | @@ -100,34 +113,34 @@ class UserTests(unittest2.TestCase): |
| 100 | 113 | self.assertEqual(self.user.demographic_profile,desktop_art_admin) |
| 101 | 114 | |
| 102 | 115 | def test_items(self): |
| 103 | - self.assertEqual(self.user.items(),set(["gimp","aaphoto","eog","emacs"])) | |
| 104 | - | |
| 105 | - def test_axi_tag_profile(self): | |
| 106 | - package_terms = ["XP"+package for package in self.user.items()] | |
| 107 | - enquire = xapian.Enquire(self.pxi) | |
| 108 | - enquire.set_query(xapian.Query(xapian.Query.OP_OR,package_terms)) | |
| 109 | - user_packages = enquire.get_mset(0, self.pxi.get_doccount(), None, None) | |
| 110 | - tag_terms = [] | |
| 111 | - for p in user_packages: | |
| 112 | - tag_terms = tag_terms + [x.term for x in p.document.termlist() \ | |
| 113 | - if x.term.startswith("XT")] | |
| 114 | - relevant_count = dict([(tag,tag_terms.count(tag)) \ | |
| 115 | - for tag in set(tag_terms)]) | |
| 116 | - #rank = {} | |
| 117 | - #non_relevant_count = dict() | |
| 118 | - #for tag,count in relevant_count.items(): | |
| 119 | - # non_relevant_count[tag] = self.pxi.get_termfreq(tag)-count | |
| 120 | - # if non_relevant_count[tag]>0: | |
| 121 | - # rank[tag] = relevant_count[tag]/float(non_relevant_count[tag]) | |
| 122 | - #print "relevant",relevant_count | |
| 123 | - #print "non_relevant",non_relevant_count | |
| 124 | - #print sorted(rank.items(), key=operator.itemgetter(1)) | |
| 125 | - #[FIXME] get ths value based on real ranking | |
| 126 | - #print set(self.user.axi_tag_profile(self.pxi,4)) | |
| 127 | - self.assertEqual(set(self.user.axi_tag_profile(self.pxi,4)), | |
| 128 | - set(["XTuse::editing", "XTworks-with::image", | |
| 129 | - "XTworks-with-format::png", | |
| 130 | - "XTworks-with-format::jpg"])) | |
| 116 | + self.assertEqual(set(self.user.items()), | |
| 117 | + set(["gimp","aaphoto","eog","emacs"])) | |
| 118 | + | |
| 119 | + def test_profile(self): | |
| 120 | + self.assertEqual(self.user.profile(self.sample_axi,"tag",10), | |
| 121 | + self.user.tag_profile(self.sample_axi,10)) | |
| 122 | + self.assertEqual(self.user.profile(self.sample_axi,"desc",10), | |
| 123 | + self.user.desc_profile(self.sample_axi,10)) | |
| 124 | + self.assertEqual(self.user.profile(self.sample_axi,"full",10), | |
| 125 | + self.user.full_profile(self.sample_axi,10)) | |
| 126 | + | |
| 127 | + def test_tag_profile(self): | |
| 128 | + self.assertEqual(self.user.tag_profile(self.sample_axi,10), | |
| 129 | + ['XTuse::editing', 'XTworks-with::image:raster', | |
| 130 | + 'XTworks-with-format::png', 'XTworks-with-format::jpg', | |
| 131 | + 'XTworks-with::image','XTimplemented-in::c', | |
| 132 | + 'XTsuite::gnome', 'XTsuite::emacs', | |
| 133 | + 'XTrole::metapackage', 'XTdevel::editor']) | |
| 134 | + | |
| 135 | + def test_desc_profile(self): | |
| 136 | + self.assertEqual(self.user.desc_profile(self.sample_axi,10), | |
| 137 | + ['image', 'the', 'which', 'manipulation', 'program', | |
| 138 | + 'input', 'a', 'gnu', 'images', 'this']) | |
| 139 | + | |
| 140 | + def test_full_profile(self): | |
| 141 | + self.assertEqual(self.user.full_profile(self.sample_axi,10), | |
| 142 | + (self.user.tag_profile(self.sample_axi,5)+ | |
| 143 | + self.user.desc_profile(self.sample_axi,5))) | |
| 131 | 144 | |
| 132 | 145 | def test_maximal_pkg_profile(self): |
| 133 | 146 | old_pkg_profile = self.user.items() | ... | ... |
src/user.py
| ... | ... | @@ -25,6 +25,7 @@ import xapian |
| 25 | 25 | import logging |
| 26 | 26 | import apt |
| 27 | 27 | from singleton import Singleton |
| 28 | +import data | |
| 28 | 29 | |
| 29 | 30 | class FilterTag(xapian.ExpandDecider): |
| 30 | 31 | """ |
| ... | ... | @@ -34,7 +35,17 @@ class FilterTag(xapian.ExpandDecider): |
| 34 | 35 | """ |
| 35 | 36 | Return true if the term is a tag, else false. |
| 36 | 37 | """ |
| 37 | - return term[:2] == "XT" | |
| 38 | + return term.startswith("XT") | |
| 39 | + | |
| 40 | +class FilterDescription(xapian.ExpandDecider): | |
| 41 | + """ | |
| 42 | + Extend xapian.ExpandDecider to consider only package description terms. | |
| 43 | + """ | |
| 44 | + def __call__(self, term): | |
| 45 | + """ | |
| 46 | + Return true if the term is a tag, else false. | |
| 47 | + """ | |
| 48 | + return term.islower() #or term.startswith("Z") | |
| 38 | 49 | |
| 39 | 50 | class DemographicProfile(Singleton): |
| 40 | 51 | def __init__(self): |
| ... | ... | @@ -63,57 +74,83 @@ class User: |
| 63 | 74 | """ |
| 64 | 75 | Define a user of a recommender. |
| 65 | 76 | """ |
| 66 | - def __init__(self,item_score,user_id=0,profiles_set=0): | |
| 77 | + def __init__(self,item_score,user_id=0,demo_profiles_set=0): | |
| 67 | 78 | """ |
| 68 | - Set initial user attributes. If no user_id was passed as parameter, a | |
| 69 | - random md5-hash is generated for that purpose. If the demographic | |
| 70 | - profile was not defined, it defaults to 'desktop' | |
| 79 | + Set initial user attributes. pkg_profile gets the whole set of items, | |
| 80 | + a random user_id is set if none was provided and the demographic | |
| 81 | + profile defaults to 'desktop'. | |
| 71 | 82 | """ |
| 72 | 83 | self.item_score = item_score |
| 84 | + self.pkg_profile = self.items() | |
| 85 | + | |
| 73 | 86 | if user_id: |
| 74 | 87 | self.id = user_id |
| 75 | 88 | else: |
| 76 | 89 | random.seed() |
| 77 | 90 | self.id = random.getrandbits(128) |
| 78 | - self.pkg_profile = self.item_score.keys() | |
| 79 | - if not profiles_set: | |
| 91 | + | |
| 92 | + if not demo_profiles_set: | |
| 80 | 93 | profiles_set = set(["desktop"]) |
| 81 | 94 | self.set_demographic_profile(profiles_set) |
| 82 | 95 | |
| 96 | + def items(self): | |
| 97 | + """ | |
| 98 | + Return the set of user items. | |
| 99 | + """ | |
| 100 | + return self.item_score.keys() | |
| 101 | + | |
| 83 | 102 | def set_demographic_profile(self,profiles_set): |
| 103 | + """ | |
| 104 | + Set demographic profle based on labels in 'profiles_set'. | |
| 105 | + """ | |
| 84 | 106 | self.demographic_profile = DemographicProfile()(profiles_set) |
| 85 | 107 | |
| 86 | - def items(self): | |
| 108 | + def profile(self,items_repository,content,size): | |
| 87 | 109 | """ |
| 88 | - Return the set of user items. | |
| 110 | + Get user profile for a specific type of content: packages tags, | |
| 111 | + description or both (full_profile) | |
| 112 | + """ | |
| 113 | + if content == "tag": return self.tag_profile(items_repository,size) | |
| 114 | + if content == "desc": return self.desc_profile(items_repository,size) | |
| 115 | + if content == "full": return self.full_profile(items_repository,size) | |
| 116 | + | |
| 117 | + def tag_profile(self,items_repository,size): | |
| 118 | + """ | |
| 119 | + Return most relevant tags for a list of packages. | |
| 89 | 120 | """ |
| 90 | - return set(self.item_score.keys()) | |
| 91 | - | |
| 92 | - def axi_tag_profile(self,apt_xapian_index,profile_size): | |
| 93 | - """ | |
| 94 | - Return most relevant tags for a list of packages based on axi. | |
| 95 | - """ | |
| 96 | - terms = ["XP"+item for item in self.pkg_profile] | |
| 97 | - query = xapian.Query(xapian.Query.OP_OR, terms) | |
| 98 | - enquire = xapian.Enquire(apt_xapian_index) | |
| 99 | - enquire.set_query(query) | |
| 100 | - rset = xapian.RSet() | |
| 101 | - for m in enquire.get_mset(0,apt_xapian_index.get_doccount()): | |
| 102 | - rset.add_document(m.docid) | |
| 103 | - # statistically good differentiators between relevant and non-relevant | |
| 104 | - eset = enquire.get_eset(profile_size, rset, FilterTag()) | |
| 105 | - profile = [] | |
| 106 | - for res in eset: | |
| 107 | - profile.append(res.term) | |
| 108 | - logging.debug("%.2f %s" % (res.weight,res.term.lstrip("XT"))) | |
| 121 | + enquire = xapian.Enquire(items_repository) | |
| 122 | + matches = data.axi_search_pkgs(items_repository,self.pkg_profile) | |
| 123 | + rset_packages = xapian.RSet() | |
| 124 | + for m in matches: | |
| 125 | + rset_packages.add_document(m.docid) | |
| 126 | + # statistically good differentiators | |
| 127 | + eset_tags = enquire.get_eset(size, rset_packages, FilterTag()) | |
| 128 | + profile = [res.term for res in eset_tags] | |
| 109 | 129 | return profile |
| 110 | 130 | |
| 111 | - #def txi_tag_profile(self,tags_xapian_index,profile_size): | |
| 112 | - # """ | |
| 113 | - # Return most relevant tags for a list of packages based on tags index. | |
| 114 | - # """ | |
| 115 | - # return tags_xapian_index.relevant_tags_from_db(self.pkg_profile, | |
| 116 | - # profile_size) | |
| 131 | + def desc_profile(self,items_repository,size): | |
| 132 | + """ | |
| 133 | + Return most relevant keywords for a list of packages based on their | |
| 134 | + text descriptions. | |
| 135 | + """ | |
| 136 | + enquire = xapian.Enquire(items_repository) | |
| 137 | + matches = data.axi_search_pkgs(items_repository,self.pkg_profile) | |
| 138 | + rset_packages = xapian.RSet() | |
| 139 | + for m in matches: | |
| 140 | + rset_packages.add_document(m.docid) | |
| 141 | + eset_keywords = enquire.get_eset(size, rset_packages, | |
| 142 | + FilterDescription()) | |
| 143 | + profile = [res.term for res in eset_keywords] | |
| 144 | + return profile | |
| 145 | + | |
| 146 | + def full_profile(self,items_repository,size): | |
| 147 | + """ | |
| 148 | + Return most relevant tags and keywords for a list of packages based | |
| 149 | + their tags and descriptions. | |
| 150 | + """ | |
| 151 | + tag_profile = self.tag_profile(items_repository,size)[:size/2] | |
| 152 | + desc_profile = self.desc_profile(items_repository,size)[:size/2] | |
| 153 | + return tag_profile+desc_profile | |
| 117 | 154 | |
| 118 | 155 | def maximal_pkg_profile(self): |
| 119 | 156 | """ |
| ... | ... | @@ -132,12 +169,11 @@ class User: |
| 132 | 169 | if or_dep.name in self.pkg_profile: |
| 133 | 170 | self.pkg_profile.remove(or_dep.name) |
| 134 | 171 | except: |
| 135 | - logging.debug("Disconsidering package not found in cache: %s" | |
| 136 | - % p) | |
| 172 | + logging.debug("Package not found in cache: %s" % p) | |
| 137 | 173 | profile_size = len(self.pkg_profile) |
| 138 | - logging.info("Reduced packages profile size from %d to %d." % | |
| 139 | - (old_profile_size, profile_size)) | |
| 140 | - return set(self.pkg_profile) | |
| 174 | + logging.debug("Maximal package profile: reduced packages profile size \ | |
| 175 | + from %d to %d." % (old_profile_size, profile_size)) | |
| 176 | + return self.pkg_profile | |
| 141 | 177 | |
| 142 | 178 | class LocalSystem(User): |
| 143 | 179 | """ |
| ... | ... | @@ -168,8 +204,9 @@ class LocalSystem(User): |
| 168 | 204 | if pkg.is_auto_installed: |
| 169 | 205 | self.pkg_profile.remove(p) |
| 170 | 206 | except: |
| 171 | - logging.debug("Disconsidering package not found in cache: %s" | |
| 172 | - % p) | |
| 207 | + logging.debug("Package not found in cache: %s" % p) | |
| 173 | 208 | profile_size = len(self.pkg_profile) |
| 174 | - logging.info("Reduced packages profile size from %d to %d." % | |
| 175 | - (old_profile_size, profile_size)) | |
| 209 | + logging.debug("No auto-intalled package profile: reduced packages \ | |
| 210 | + profile size from %d to %d." % | |
| 211 | + (old_profile_size, profile_size)) | |
| 212 | + return self.pkg_profile | ... | ... |