# epydoc -- Utility functions # # Copyright (C) 2005 Edward Loper # Author: Edward Loper # URL: # # $Id: util.py 1671 2008-01-29 02:55:49Z edloper $ """ Miscellaneous utility functions that are used by multiple modules. @group Python source types: is_module_file, is_package_dir, is_pyname, py_src_filename @group Text processing: wordwrap, decode_with_backslashreplace, plaintext_to_html """ __docformat__ = 'epytext en' import os, os.path, re ###################################################################### ## Python Source Types ###################################################################### PY_SRC_EXTENSIONS = ['.py', '.pyw'] PY_BIN_EXTENSIONS = ['.pyc', '.so', '.pyd'] def is_module_file(path): # Make sure it's a file name. if not isinstance(path, basestring): return False (dir, filename) = os.path.split(path) (basename, extension) = os.path.splitext(filename) return (os.path.isfile(path) and re.match('[a-zA-Z_]\w*$', basename) and extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS) def is_src_filename(filename): if not isinstance(filename, basestring): return False if not os.path.exists(filename): return False return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS def is_package_dir(dirname): """ Return true if the given directory is a valid package directory (i.e., it names a directory that contains a valid __init__ file, and its name is a valid identifier). """ # Make sure it's a directory name. if not isinstance(dirname, basestring): return False if not os.path.isdir(dirname): return False dirname = os.path.abspath(dirname) # Make sure it's a valid identifier. (Special case for # "foo/", where os.path.split -> ("foo", "").) (parent, dir) = os.path.split(dirname) if dir == '': (parent, dir) = os.path.split(parent) # The following constraint was removed because of sourceforge # bug #1787028 -- in some cases (eg eggs), it's too strict. #if not re.match('\w+$', dir): # return False for name in os.listdir(dirname): filename = os.path.join(dirname, name) if name.startswith('__init__.') and is_module_file(filename): return True else: return False def is_pyname(name): return re.match(r"\w+(\.\w+)*$", name) def py_src_filename(filename): basefile, extension = os.path.splitext(filename) if extension in PY_SRC_EXTENSIONS: return filename else: for ext in PY_SRC_EXTENSIONS: if os.path.isfile('%s%s' % (basefile, ext)): return '%s%s' % (basefile, ext) else: raise ValueError('Could not find a corresponding ' 'Python source file for %r.' % filename) def munge_script_name(filename): name = os.path.split(filename)[1] name = re.sub(r'\W', '_', name) return 'script-'+name ###################################################################### ## Text Processing ###################################################################### def decode_with_backslashreplace(s): r""" Convert the given 8-bit string into unicode, treating any character c such that ord(c)<128 as an ascii character, and converting any c such that ord(c)>128 into a backslashed escape sequence. >>> decode_with_backslashreplace('abc\xff\xe8') u'abc\\xff\\xe8' """ # s.encode('string-escape') is not appropriate here, since it # also adds backslashes to some ascii chars (eg \ and '). assert isinstance(s, str) return (s .decode('latin1') .encode('ascii', 'backslashreplace') .decode('ascii')) def wordwrap(str, indent=0, right=75, startindex=0, splitchars=''): """ Word-wrap the given string. I.e., add newlines to the string such that any lines that are longer than C{right} are broken into shorter lines (at the first whitespace sequence that occurs before index C{right}). If the given string contains newlines, they will I{not} be removed. Any lines that begin with whitespace will not be wordwrapped. @param indent: If specified, then indent each line by this number of spaces. @type indent: C{int} @param right: The right margin for word wrapping. Lines that are longer than C{right} will be broken at the first whitespace sequence before the right margin. @type right: C{int} @param startindex: If specified, then assume that the first line is already preceeded by C{startindex} characters. @type startindex: C{int} @param splitchars: A list of non-whitespace characters which can be used to split a line. (E.g., use '/\\' to allow path names to be split over multiple lines.) @rtype: C{str} """ if splitchars: chunks = re.split(r'( +|\n|[^ \n%s]*[%s])' % (re.escape(splitchars), re.escape(splitchars)), str.expandtabs()) else: chunks = re.split(r'( +|\n)', str.expandtabs()) result = [' '*(indent-startindex)] charindex = max(indent, startindex) for chunknum, chunk in enumerate(chunks): if (charindex+len(chunk) > right and charindex > 0) or chunk == '\n': result.append('\n' + ' '*indent) charindex = indent if chunk[:1] not in ('\n', ' '): result.append(chunk) charindex += len(chunk) else: result.append(chunk) charindex += len(chunk) return ''.join(result).rstrip()+'\n' def plaintext_to_html(s): """ @return: An HTML string that encodes the given plaintext string. In particular, special characters (such as C{'<'} and C{'&'}) are escaped. @rtype: C{string} """ s = s.replace('&', '&').replace('"', '"') s = s.replace('<', '<').replace('>', '>') return s def plaintext_to_latex(str, nbsp=0, breakany=0): """ @return: A LaTeX string that encodes the given plaintext string. In particular, special characters (such as C{'$'} and C{'_'}) are escaped, and tabs are expanded. @rtype: C{string} @param breakany: Insert hyphenation marks, so that LaTeX can break the resulting string at any point. This is useful for small boxes (e.g., the type box in the variable list table). @param nbsp: Replace every space with a non-breaking space (C{'~'}). """ # These get converted to hyphenation points later if breakany: str = re.sub('(.)', '\\1\1', str) # These get converted to \textbackslash later. str = str.replace('\\', '\0') # Expand tabs str = str.expandtabs() # These elements need to be backslashed. str = re.sub(r'([#$&%_\${}])', r'\\\1', str) # These elements have special names. str = str.replace('|', '{\\textbar}') str = str.replace('<', '{\\textless}') str = str.replace('>', '{\\textgreater}') str = str.replace('^', '{\\textasciicircum}') str = str.replace('~', '{\\textasciitilde}') str = str.replace('\0', r'{\textbackslash}') # replace spaces with non-breaking spaces if nbsp: str = str.replace(' ', '~') # Convert \1's to hyphenation points. if breakany: str = str.replace('\1', r'\-') return str class RunSubprocessError(OSError): def __init__(self, cmd, out, err): OSError.__init__(self, '%s failed' % cmd[0]) self.out = out self.err = err def run_subprocess(cmd, data=None): """ Execute the command C{cmd} in a subprocess. @param cmd: The command to execute, specified as a list of string. @param data: A string containing data to send to the subprocess. @return: A tuple C{(out, err)}. @raise OSError: If there is any problem executing the command, or if its exitval is not 0. """ if isinstance(cmd, basestring): cmd = cmd.split() # Under Python 2.4+, use subprocess try: from subprocess import Popen, PIPE pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) out, err = pipe.communicate(data) if hasattr(pipe, 'returncode'): if pipe.returncode == 0: return out, err else: raise RunSubprocessError(cmd, out, err) else: # Assume that there was an error iff anything was written # to the child's stderr. if err == '': return out, err else: raise RunSubprocessError(cmd, out, err) except ImportError: pass # Under Python 2.3 or earlier, on unix, use popen2.Popen3 so we # can access the return value. import popen2 if hasattr(popen2, 'Popen3'): pipe = popen2.Popen3(' '.join(cmd), True) to_child = pipe.tochild from_child = pipe.fromchild child_err = pipe.childerr if data: to_child.write(data) to_child.close() out = err = '' while pipe.poll() is None: out += from_child.read() err += child_err.read() out += from_child.read() err += child_err.read() if pipe.wait() == 0: return out, err else: raise RunSubprocessError(cmd, out, err) # Under Python 2.3 or earlier, on non-unix, use os.popen3 else: to_child, from_child, child_err = os.popen3(' '.join(cmd), 'b') if data: try: to_child.write(data) # Guard for a broken pipe error except IOError, e: raise OSError(e) to_child.close() out = from_child.read() err = child_err.read() # Assume that there was an error iff anything was written # to the child's stderr. if err == '': return out, err else: raise RunSubprocessError(cmd, out, err)