diff --git a/demo_rec.py b/demo_rec.py deleted file mode 100644 index 5957197..0000000 --- a/demo_rec.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/python - -# AppRecommender - A GNU/Linux application recommender -# -# Copyright (C) 2010 Tassia Camoes -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - -import os -import sys -import commands -import re - -import xapian -from debian import debtags - -DB_PATH = "/var/lib/debtags/package-tags" -INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") - -def load_debtags_db(path): - """ Load debtags database. """ - debtags_db = debtags.DB() - tag_filter = re.compile(r"^special::.+$|^.+::TODO$") - try: - debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x)) - except IOError: - print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" % - path) - exit(1) - return debtags_db - -def get_system_pkgs(): - """ Return set of system packages. """ - dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') - return dpkg_output.replace('install','\t').split() - -def get_most_relevant_tags(debtags_db,pkgs_list): - """ Return most relevant tags considering a list of packages. """ - relevant_db = debtags_db.choose_packages(pkgs_list) - relevance_index = debtags.relevance_index_function(debtags_db,relevant_db) - sorted_relevant_tags = sorted(relevant_db.iter_tags(), - lambda a, b: cmp(relevance_index(a), - relevance_index(b))) - return normalize_tags(' '.join(sorted_relevant_tags[-50:])) - -def normalize_tags(string): - """ Normalize tag string so that it can be indexed and retrieved. """ - return string.replace(':','_').replace('-','\'') - -def create_debtags_index(debtags_db,index_path): - """ Create a xapian index for debtags info based on file 'debtags_db' and - place it at 'index_path'. - """ - if not os.path.exists(index_path): - os.makedirs(index_path) - print "Creating new debtags xapian index at \'%s\'" % index_path - debtags_index = xapian.WritableDatabase(index_path, - xapian.DB_CREATE_OR_OVERWRITE) - for pkg,tags in debtags_db.iter_packages_tags(): - doc = xapian.Document() - doc.set_data(pkg) - for tag in tags: - doc.add_term(normalize_tags(tag)) - print "indexing ",debtags_index.add_document(doc) - return debtags_index - -def load_debtags_index(debtags_db,reindex): - """ Load an existing or new debtags index, based on boolean reindex. """ - if not reindex: - try: - print ("Opening existing debtags xapian index at \'%s\'" % - INDEX_PATH) - debtags_index = xapian.Database(INDEX_PATH) - except DatabaseError: - print "Could not open debtags xapian index" - reindex = 1 - if reindex: - debtags_index = create_debtags_index(debtags_db,INDEX_PATH) - return debtags_index - - -class PkgMatchDecider(xapian.MatchDecider): - """ Extends xapian.MatchDecider to disconsider installed packages. """ - - def __init__(self, installed_pkgs): - xapian.MatchDecider.__init__(self) - self.installed_pkgs = installed_pkgs - - def __call__(self, doc): - return doc.get_data() not in self.installed_pkgs - - -if __name__ == '__main__': - - reindex = 0 - if len(sys.argv) == 2: - DB_PATH = sys.argv[1] - reindex = 1 - print "reindex true" - elif len(sys.argv) > 2: - print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % - sys.argv[0]) - sys.exit(1) - - debtags_db = load_debtags_db(DB_PATH) - installed_pkgs = get_system_pkgs() - best_tags = get_most_relevant_tags(debtags_db,installed_pkgs) - - debtags_index = load_debtags_index(debtags_db,reindex) - qp = xapian.QueryParser() - query = qp.parse_query(best_tags) - enquire = xapian.Enquire(debtags_index) - enquire.set_query(query) - - mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs)) - for m in mset: - print "%2d: %s" % (m.rank, m.document.get_data()) diff --git a/doc/doxy_config b/doc/doxy_config new file mode 100644 index 0000000..8a73f58 --- /dev/null +++ b/doc/doxy_config @@ -0,0 +1,270 @@ +# Doxyfile 1.7.1 + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +DOXYFILE_ENCODING = UTF-8 +PROJECT_NAME = App Recommender +PROJECT_NUMBER = 0.1 +OUTPUT_DIRECTORY = doc/ +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = YES +STRIP_FROM_PATH = +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = NO +QT_AUTOBRIEF = NO +MULTILINE_CPP_IS_BRIEF = NO +INHERIT_DOCS = YES +SEPARATE_MEMBER_PAGES = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +OPTIMIZE_FOR_FORTRAN = NO +OPTIMIZE_OUTPUT_VHDL = NO +EXTENSION_MAPPING = +BUILTIN_STL_SUPPORT = NO +CPP_CLI_SUPPORT = NO +SIP_SUPPORT = NO +IDL_PROPERTY_SUPPORT = YES +DISTRIBUTE_GROUP_DOC = NO +SUBGROUPING = YES +TYPEDEF_HIDES_STRUCT = NO +SYMBOL_CACHE_SIZE = 0 +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = NO +EXTRACT_PRIVATE = NO +EXTRACT_STATIC = NO +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +EXTRACT_ANON_NSPACES = NO +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = NO +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +FORCE_LOCAL_INCLUDES = NO +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_MEMBERS_CTORS_1ST = NO +SORT_GROUP_NAMES = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = NO +SHOW_FILES = YES +SHOW_NAMESPACES = YES +FILE_VERSION_FILTER = +LAYOUT_FILE = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = +INPUT_ENCODING = UTF-8 +FILE_PATTERNS = +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXCLUDE_SYMBOLS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = NO +REFERENCES_RELATION = NO +REFERENCES_LINK_SOURCE = YES +USE_HTAGS = NO +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_COLORSTYLE_HUE = 220 +HTML_COLORSTYLE_SAT = 100 +HTML_COLORSTYLE_GAMMA = 80 +HTML_TIMESTAMP = YES +HTML_ALIGN_MEMBERS = YES +HTML_DYNAMIC_SECTIONS = NO +GENERATE_DOCSET = NO +DOCSET_FEEDNAME = "Doxygen generated docs" +DOCSET_BUNDLE_ID = org.doxygen.Project +DOCSET_PUBLISHER_ID = org.doxygen.Publisher +DOCSET_PUBLISHER_NAME = Publisher +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +CHM_INDEX_ENCODING = +BINARY_TOC = NO +TOC_EXPAND = NO +GENERATE_QHP = NO +QCH_FILE = +QHP_NAMESPACE = org.doxygen.Project +QHP_VIRTUAL_FOLDER = doc +QHP_CUST_FILTER_NAME = +QHP_CUST_FILTER_ATTRS = +QHP_SECT_FILTER_ATTRS = +QHG_LOCATION = +GENERATE_ECLIPSEHELP = NO +ECLIPSE_DOC_ID = org.doxygen.Project +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +USE_INLINE_TREES = NO +TREEVIEW_WIDTH = 250 +EXT_LINKS_IN_WINDOW = NO +FORMULA_FONTSIZE = 10 +FORMULA_TRANSPARENT = YES +SEARCHENGINE = YES +SERVER_BASED_SEARCH = NO +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = NO +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = a4wide +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +LATEX_SOURCE_CODE = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = NO +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = NO +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = NO +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +MSCGEN_PATH = +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = NO +DOT_NUM_THREADS = 0 +DOT_FONTNAME = FreeSans.ttf +DOT_FONTSIZE = 10 +DOT_FONTPATH = +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +CALLER_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +DOT_GRAPH_MAX_NODES = 50 +MAX_DOT_GRAPH_DEPTH = 0 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = YES +GENERATE_LEGEND = YES +DOT_CLEANUP = YES diff --git a/doxy_config b/doxy_config deleted file mode 100644 index 8a73f58..0000000 --- a/doxy_config +++ /dev/null @@ -1,270 +0,0 @@ -# Doxyfile 1.7.1 - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- -DOXYFILE_ENCODING = UTF-8 -PROJECT_NAME = App Recommender -PROJECT_NUMBER = 0.1 -OUTPUT_DIRECTORY = doc/ -CREATE_SUBDIRS = NO -OUTPUT_LANGUAGE = English -BRIEF_MEMBER_DESC = YES -REPEAT_BRIEF = YES -ABBREVIATE_BRIEF = -ALWAYS_DETAILED_SEC = NO -INLINE_INHERITED_MEMB = NO -FULL_PATH_NAMES = YES -STRIP_FROM_PATH = -STRIP_FROM_INC_PATH = -SHORT_NAMES = NO -JAVADOC_AUTOBRIEF = NO -QT_AUTOBRIEF = NO -MULTILINE_CPP_IS_BRIEF = NO -INHERIT_DOCS = YES -SEPARATE_MEMBER_PAGES = NO -TAB_SIZE = 4 -ALIASES = -OPTIMIZE_OUTPUT_FOR_C = NO -OPTIMIZE_OUTPUT_JAVA = NO -OPTIMIZE_FOR_FORTRAN = NO -OPTIMIZE_OUTPUT_VHDL = NO -EXTENSION_MAPPING = -BUILTIN_STL_SUPPORT = NO -CPP_CLI_SUPPORT = NO -SIP_SUPPORT = NO -IDL_PROPERTY_SUPPORT = YES -DISTRIBUTE_GROUP_DOC = NO -SUBGROUPING = YES -TYPEDEF_HIDES_STRUCT = NO -SYMBOL_CACHE_SIZE = 0 -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- -EXTRACT_ALL = NO -EXTRACT_PRIVATE = NO -EXTRACT_STATIC = NO -EXTRACT_LOCAL_CLASSES = YES -EXTRACT_LOCAL_METHODS = YES -EXTRACT_ANON_NSPACES = NO -HIDE_UNDOC_MEMBERS = NO -HIDE_UNDOC_CLASSES = NO -HIDE_FRIEND_COMPOUNDS = NO -HIDE_IN_BODY_DOCS = NO -INTERNAL_DOCS = NO -CASE_SENSE_NAMES = YES -HIDE_SCOPE_NAMES = NO -SHOW_INCLUDE_FILES = YES -FORCE_LOCAL_INCLUDES = NO -INLINE_INFO = YES -SORT_MEMBER_DOCS = YES -SORT_BRIEF_DOCS = NO -SORT_MEMBERS_CTORS_1ST = NO -SORT_GROUP_NAMES = NO -SORT_BY_SCOPE_NAME = NO -GENERATE_TODOLIST = YES -GENERATE_TESTLIST = YES -GENERATE_BUGLIST = YES -GENERATE_DEPRECATEDLIST= YES -ENABLED_SECTIONS = -MAX_INITIALIZER_LINES = 30 -SHOW_USED_FILES = YES -SHOW_DIRECTORIES = NO -SHOW_FILES = YES -SHOW_NAMESPACES = YES -FILE_VERSION_FILTER = -LAYOUT_FILE = -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- -QUIET = NO -WARNINGS = YES -WARN_IF_UNDOCUMENTED = YES -WARN_IF_DOC_ERROR = YES -WARN_NO_PARAMDOC = NO -WARN_FORMAT = "$file:$line: $text" -WARN_LOGFILE = -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- -INPUT = -INPUT_ENCODING = UTF-8 -FILE_PATTERNS = -RECURSIVE = YES -EXCLUDE = -EXCLUDE_SYMLINKS = NO -EXCLUDE_PATTERNS = -EXCLUDE_SYMBOLS = -EXAMPLE_PATH = -EXAMPLE_PATTERNS = -EXAMPLE_RECURSIVE = NO -IMAGE_PATH = -INPUT_FILTER = -FILTER_PATTERNS = -FILTER_SOURCE_FILES = NO -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- -SOURCE_BROWSER = YES -INLINE_SOURCES = NO -STRIP_CODE_COMMENTS = YES -REFERENCED_BY_RELATION = NO -REFERENCES_RELATION = NO -REFERENCES_LINK_SOURCE = YES -USE_HTAGS = NO -VERBATIM_HEADERS = YES -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- -ALPHABETICAL_INDEX = YES -COLS_IN_ALPHA_INDEX = 5 -IGNORE_PREFIX = -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- -GENERATE_HTML = YES -HTML_OUTPUT = html -HTML_FILE_EXTENSION = .html -HTML_HEADER = -HTML_FOOTER = -HTML_STYLESHEET = -HTML_COLORSTYLE_HUE = 220 -HTML_COLORSTYLE_SAT = 100 -HTML_COLORSTYLE_GAMMA = 80 -HTML_TIMESTAMP = YES -HTML_ALIGN_MEMBERS = YES -HTML_DYNAMIC_SECTIONS = NO -GENERATE_DOCSET = NO -DOCSET_FEEDNAME = "Doxygen generated docs" -DOCSET_BUNDLE_ID = org.doxygen.Project -DOCSET_PUBLISHER_ID = org.doxygen.Publisher -DOCSET_PUBLISHER_NAME = Publisher -GENERATE_HTMLHELP = NO -CHM_FILE = -HHC_LOCATION = -GENERATE_CHI = NO -CHM_INDEX_ENCODING = -BINARY_TOC = NO -TOC_EXPAND = NO -GENERATE_QHP = NO -QCH_FILE = -QHP_NAMESPACE = org.doxygen.Project -QHP_VIRTUAL_FOLDER = doc -QHP_CUST_FILTER_NAME = -QHP_CUST_FILTER_ATTRS = -QHP_SECT_FILTER_ATTRS = -QHG_LOCATION = -GENERATE_ECLIPSEHELP = NO -ECLIPSE_DOC_ID = org.doxygen.Project -DISABLE_INDEX = NO -ENUM_VALUES_PER_LINE = 4 -GENERATE_TREEVIEW = NO -USE_INLINE_TREES = NO -TREEVIEW_WIDTH = 250 -EXT_LINKS_IN_WINDOW = NO -FORMULA_FONTSIZE = 10 -FORMULA_TRANSPARENT = YES -SEARCHENGINE = YES -SERVER_BASED_SEARCH = NO -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- -GENERATE_LATEX = NO -LATEX_OUTPUT = latex -LATEX_CMD_NAME = latex -MAKEINDEX_CMD_NAME = makeindex -COMPACT_LATEX = NO -PAPER_TYPE = a4wide -EXTRA_PACKAGES = -LATEX_HEADER = -PDF_HYPERLINKS = YES -USE_PDFLATEX = YES -LATEX_BATCHMODE = NO -LATEX_HIDE_INDICES = NO -LATEX_SOURCE_CODE = NO -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- -GENERATE_RTF = NO -RTF_OUTPUT = rtf -COMPACT_RTF = NO -RTF_HYPERLINKS = NO -RTF_STYLESHEET_FILE = -RTF_EXTENSIONS_FILE = -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- -GENERATE_MAN = NO -MAN_OUTPUT = man -MAN_EXTENSION = .3 -MAN_LINKS = NO -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- -GENERATE_XML = NO -XML_OUTPUT = xml -XML_SCHEMA = -XML_DTD = -XML_PROGRAMLISTING = YES -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- -GENERATE_AUTOGEN_DEF = NO -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- -GENERATE_PERLMOD = NO -PERLMOD_LATEX = NO -PERLMOD_PRETTY = YES -PERLMOD_MAKEVAR_PREFIX = -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- -ENABLE_PREPROCESSING = YES -MACRO_EXPANSION = NO -EXPAND_ONLY_PREDEF = NO -SEARCH_INCLUDES = YES -INCLUDE_PATH = -INCLUDE_FILE_PATTERNS = -PREDEFINED = -EXPAND_AS_DEFINED = -SKIP_FUNCTION_MACROS = YES -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- -TAGFILES = -GENERATE_TAGFILE = -ALLEXTERNALS = NO -EXTERNAL_GROUPS = YES -PERL_PATH = /usr/bin/perl -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- -CLASS_DIAGRAMS = YES -MSCGEN_PATH = -HIDE_UNDOC_RELATIONS = YES -HAVE_DOT = NO -DOT_NUM_THREADS = 0 -DOT_FONTNAME = FreeSans.ttf -DOT_FONTSIZE = 10 -DOT_FONTPATH = -CLASS_GRAPH = YES -COLLABORATION_GRAPH = YES -GROUP_GRAPHS = YES -UML_LOOK = NO -TEMPLATE_RELATIONS = NO -INCLUDE_GRAPH = YES -INCLUDED_BY_GRAPH = YES -CALL_GRAPH = NO -CALLER_GRAPH = NO -GRAPHICAL_HIERARCHY = YES -DIRECTORY_GRAPH = YES -DOT_IMAGE_FORMAT = png -DOT_PATH = -DOTFILE_DIRS = -DOT_GRAPH_MAX_NODES = 50 -MAX_DOT_GRAPH_DEPTH = 0 -DOT_TRANSPARENT = NO -DOT_MULTI_TARGETS = YES -GENERATE_LEGEND = YES -DOT_CLEANUP = YES diff --git a/src/data.py b/src/data.py index 82fb870..3a7395e 100644 --- a/src/data.py +++ b/src/data.py @@ -68,16 +68,6 @@ class DebtagsDB(debtags.DB): relevance_index(b))) return normalize_tags(' '.join(sorted_relevant_tags[-qtd_of_tags:])) -class PkgMatchDecider(xapian.MatchDecider): - """ Extends xapian.MatchDecider to disconsider installed packages. """ - - def __init__(self, installed_pkgs): - xapian.MatchDecider.__init__(self) - self.installed_pkgs = installed_pkgs - - def __call__(self, doc): - return doc.get_data() not in self.installed_pkgs - class DebtagsIndex: def __init__(self,path): self.path = path diff --git a/src/demo_rec.py b/src/demo_rec.py new file mode 100755 index 0000000..f69e1b8 --- /dev/null +++ b/src/demo_rec.py @@ -0,0 +1,118 @@ +#!/usr/bin/python + +# AppRecommender - A GNU/Linux application recommender +# +# Copyright (C) 2010 Tassia Camoes +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import sys +import commands +import re + +import xapian +from debian import debtags +from strategy import PkgMatchDecider + +DB_PATH = "/var/lib/debtags/package-tags" +INDEX_PATH = os.path.expanduser("~/.app-recommender/debtags_index") + +def load_debtags_db(path): + """ Load debtags database. """ + debtags_db = debtags.DB() + tag_filter = re.compile(r"^special::.+$|^.+::TODO$") + try: + debtags_db.read(open(path, "r"), lambda x: not tag_filter.match(x)) + except IOError: + print >> sys.stderr, ("IOError: could not open debtags file \'%s\'" % + path) + exit(1) + return debtags_db + +def get_system_pkgs(): + """ Return set of system packages. """ + dpkg_output = commands.getoutput('/usr/bin/dpkg --get-selections') + return dpkg_output.replace('install','\t').split() + +def get_most_relevant_tags(debtags_db,pkgs_list): + """ Return most relevant tags considering a list of packages. """ + relevant_db = debtags_db.choose_packages(pkgs_list) + relevance_index = debtags.relevance_index_function(debtags_db,relevant_db) + sorted_relevant_tags = sorted(relevant_db.iter_tags(), + lambda a, b: cmp(relevance_index(a), + relevance_index(b))) + return normalize_tags(' '.join(sorted_relevant_tags[-50:])) + +def normalize_tags(string): + """ Normalize tag string so that it can be indexed and retrieved. """ + return string.replace(':','_').replace('-','\'') + +def create_debtags_index(debtags_db,index_path): + """ Create a xapian index for debtags info based on file 'debtags_db' and + place it at 'index_path'. + """ + if not os.path.exists(index_path): + os.makedirs(index_path) + print "Creating new debtags xapian index at \'%s\'" % index_path + debtags_index = xapian.WritableDatabase(index_path, + xapian.DB_CREATE_OR_OVERWRITE) + for pkg,tags in debtags_db.iter_packages_tags(): + doc = xapian.Document() + doc.set_data(pkg) + for tag in tags: + doc.add_term(normalize_tags(tag)) + print "indexing ",debtags_index.add_document(doc) + return debtags_index + +def load_debtags_index(debtags_db,reindex): + """ Load an existing or new debtags index, based on boolean reindex. """ + if not reindex: + try: + print ("Opening existing debtags xapian index at \'%s\'" % + INDEX_PATH) + debtags_index = xapian.Database(INDEX_PATH) + except DatabaseError: + print "Could not open debtags xapian index" + reindex = 1 + if reindex: + debtags_index = create_debtags_index(debtags_db,INDEX_PATH) + return debtags_index + + +if __name__ == '__main__': + + reindex = 0 + if len(sys.argv) == 2: + DB_PATH = sys.argv[1] + reindex = 1 + print "reindex true" + elif len(sys.argv) > 2: + print >> sys.stderr, ("Usage: %s [PATH_TO_DEBTAGS_DATABASE]" % + sys.argv[0]) + sys.exit(1) + + debtags_db = load_debtags_db(DB_PATH) + installed_pkgs = get_system_pkgs() + best_tags = get_most_relevant_tags(debtags_db,installed_pkgs) + + debtags_index = load_debtags_index(debtags_db,reindex) + qp = xapian.QueryParser() + query = qp.parse_query(best_tags) + enquire = xapian.Enquire(debtags_index) + enquire.set_query(query) + + mset = enquire.get_mset(0, 20, None, PkgMatchDecider(installed_pkgs)) + for m in mset: + print "%2d: %s" % (m.rank, m.document.get_data()) -- libgit2 0.21.2